Remove sync await points (#3036)

## Issue Addressed

Removes the await points in sync waiting for a processor response for rpc block processing. Built on top of #3029 
This also handles a couple of bugs in the previous code and adds a relatively comprehensive test suite.
This commit is contained in:
Divma 2022-03-23 01:09:39 +00:00
parent af50130e21
commit 788b6af3c4
13 changed files with 1722 additions and 727 deletions

View File

@ -38,9 +38,10 @@
//! checks the queues to see if there are more parcels of work that can be spawned in a new worker
//! task.
use crate::sync::manager::BlockProcessType;
use crate::{metrics, service::NetworkMessage, sync::SyncMessage};
use beacon_chain::parking_lot::Mutex;
use beacon_chain::{BeaconChain, BeaconChainTypes, BlockError, GossipVerifiedBlock};
use beacon_chain::{BeaconChain, BeaconChainTypes, GossipVerifiedBlock};
use futures::stream::{Stream, StreamExt};
use futures::task::Poll;
use lighthouse_network::{
@ -57,7 +58,7 @@ use std::task::Context;
use std::time::Duration;
use std::{cmp, collections::HashSet};
use task_executor::TaskExecutor;
use tokio::sync::{mpsc, oneshot};
use tokio::sync::mpsc;
use types::{
Attestation, AttesterSlashing, Hash256, ProposerSlashing, SignedAggregateAndProof,
SignedBeaconBlock, SignedContributionAndProof, SignedVoluntaryExit, SubnetId,
@ -74,7 +75,7 @@ mod work_reprocessing_queue;
mod worker;
use crate::beacon_processor::work_reprocessing_queue::QueuedBlock;
pub use worker::{GossipAggregatePackage, GossipAttestationPackage, ProcessId};
pub use worker::{ChainSegmentProcessId, GossipAggregatePackage, GossipAttestationPackage};
/// The maximum size of the channel for work events to the `BeaconProcessor`.
///
@ -196,10 +197,6 @@ pub const BLOCKS_BY_ROOTS_REQUEST: &str = "blocks_by_roots_request";
pub const UNKNOWN_BLOCK_ATTESTATION: &str = "unknown_block_attestation";
pub const UNKNOWN_BLOCK_AGGREGATE: &str = "unknown_block_aggregate";
/// Used to send/receive results from a rpc block import in a blocking task.
pub type BlockResultSender<E> = oneshot::Sender<Result<Hash256, BlockError<E>>>;
pub type BlockResultReceiver<E> = oneshot::Receiver<Result<Hash256, BlockError<E>>>;
/// A simple first-in-first-out queue with a maximum length.
struct FifoQueue<T> {
queue: VecDeque<T>,
@ -496,18 +493,22 @@ impl<T: BeaconChainTypes> WorkEvent<T> {
/// sent to the other side of `result_tx`.
pub fn rpc_beacon_block(
block: Box<SignedBeaconBlock<T::EthSpec>>,
) -> (Self, BlockResultReceiver<T::EthSpec>) {
let (result_tx, result_rx) = oneshot::channel();
let event = Self {
seen_timestamp: Duration,
process_type: BlockProcessType,
) -> Self {
Self {
drop_during_sync: false,
work: Work::RpcBlock { block, result_tx },
};
(event, result_rx)
work: Work::RpcBlock {
block,
seen_timestamp,
process_type,
},
}
}
/// Create a new work event to import `blocks` as a beacon chain segment.
pub fn chain_segment(
process_id: ProcessId,
process_id: ChainSegmentProcessId,
blocks: Vec<SignedBeaconBlock<T::EthSpec>>,
) -> Self {
Self {
@ -692,10 +693,11 @@ pub enum Work<T: BeaconChainTypes> {
},
RpcBlock {
block: Box<SignedBeaconBlock<T::EthSpec>>,
result_tx: BlockResultSender<T::EthSpec>,
seen_timestamp: Duration,
process_type: BlockProcessType,
},
ChainSegment {
process_id: ProcessId,
process_id: ChainSegmentProcessId,
blocks: Vec<SignedBeaconBlock<T::EthSpec>>,
},
Status {
@ -1488,10 +1490,15 @@ impl<T: BeaconChainTypes> BeaconProcessor<T> {
/*
* Verification for beacon blocks received during syncing via RPC.
*/
Work::RpcBlock { block, result_tx } => {
Work::RpcBlock {
block,
seen_timestamp,
process_type,
} => {
worker.process_rpc_block(
*block,
result_tx,
seen_timestamp,
process_type,
work_reprocessing_tx.clone(),
duplicate_cache,
);

View File

@ -240,7 +240,13 @@ impl TestRig {
}
pub fn enqueue_rpc_block(&self) {
let (event, _rx) = WorkEvent::rpc_beacon_block(Box::new(self.next_block.clone()));
let event = WorkEvent::rpc_beacon_block(
Box::new(self.next_block.clone()),
std::time::Duration::default(),
BlockProcessType::ParentLookup {
chain_hash: Hash256::random(),
},
);
self.beacon_processor_tx.try_send(event).unwrap();
}

View File

@ -10,7 +10,7 @@ mod rpc_methods;
mod sync_methods;
pub use gossip_methods::{GossipAggregatePackage, GossipAttestationPackage};
pub use sync_methods::ProcessId;
pub use sync_methods::ChainSegmentProcessId;
pub(crate) const FUTURE_SLOT_TOLERANCE: u64 = 1;

View File

@ -1,26 +1,28 @@
use std::time::Duration;
use super::{super::work_reprocessing_queue::ReprocessQueueMessage, Worker};
use crate::beacon_processor::worker::FUTURE_SLOT_TOLERANCE;
use crate::beacon_processor::{BlockResultSender, DuplicateCache};
use crate::beacon_processor::DuplicateCache;
use crate::metrics;
use crate::sync::manager::{SyncMessage, SyncRequestType};
use crate::sync::manager::{BlockProcessType, SyncMessage};
use crate::sync::{BatchProcessResult, ChainId};
use beacon_chain::{
BeaconChainError, BeaconChainTypes, BlockError, ChainSegmentResult, HistoricalBlockError,
};
use lighthouse_network::{PeerAction, PeerId};
use slog::{crit, debug, error, info, trace, warn};
use lighthouse_network::PeerAction;
use slog::{debug, error, info, trace, warn};
use tokio::sync::mpsc;
use types::{Epoch, Hash256, SignedBeaconBlock};
/// Id associated to a block processing request, either a batch or a single block.
/// Id associated to a batch processing request, either a sync batch or a parent lookup.
#[derive(Clone, Debug, PartialEq)]
pub enum ProcessId {
pub enum ChainSegmentProcessId {
/// Processing Id of a range syncing batch.
RangeBatchId(ChainId, Epoch),
/// Processing ID for a backfill syncing batch.
BackSyncBatchId(Epoch),
/// Processing Id of the parent lookup of a block.
ParentLookup(PeerId, Hash256),
ParentLookup(Hash256),
}
/// Returned when a chain segment import fails.
@ -32,88 +34,77 @@ struct ChainSegmentFailed {
}
impl<T: BeaconChainTypes> Worker<T> {
/// Attempt to process a block received from a direct RPC request, returning the processing
/// result on the `result_tx` channel.
///
/// Raises a log if there are errors publishing the result to the channel.
/// Attempt to process a block received from a direct RPC request.
pub fn process_rpc_block(
self,
block: SignedBeaconBlock<T::EthSpec>,
result_tx: BlockResultSender<T::EthSpec>,
seen_timestamp: Duration,
process_type: BlockProcessType,
reprocess_tx: mpsc::Sender<ReprocessQueueMessage<T>>,
duplicate_cache: DuplicateCache,
) {
let block_root = block.canonical_root();
// Checks if the block is already being imported through another source
if let Some(handle) = duplicate_cache.check_and_insert(block_root) {
let slot = block.slot();
let block_result = self.chain.process_block(block);
// Check if the block is already being imported through another source
let handle = match duplicate_cache.check_and_insert(block.canonical_root()) {
Some(handle) => handle,
None => {
// Sync handles these results
self.send_sync_message(SyncMessage::BlockProcessed {
process_type,
result: Err(BlockError::BlockIsAlreadyKnown),
});
return;
}
};
let slot = block.slot();
let result = self.chain.process_block(block);
metrics::inc_counter(&metrics::BEACON_PROCESSOR_RPC_BLOCK_IMPORTED_TOTAL);
metrics::inc_counter(&metrics::BEACON_PROCESSOR_RPC_BLOCK_IMPORTED_TOTAL);
if let Ok(root) = &block_result {
info!(
self.log,
"New RPC block received";
"slot" => slot,
"hash" => %root
// RPC block imported, regardless of process type
if let &Ok(hash) = &result {
info!(self.log, "New RPC block received"; "slot" => slot, "hash" => %hash);
// Trigger processing for work referencing this block.
let reprocess_msg = ReprocessQueueMessage::BlockImported(hash);
if reprocess_tx.try_send(reprocess_msg).is_err() {
error!(self.log, "Failed to inform block import"; "source" => "rpc", "block_root" => %hash)
};
if matches!(process_type, BlockProcessType::SingleBlock { .. }) {
self.chain.block_times_cache.write().set_time_observed(
hash,
slot,
seen_timestamp,
None,
None,
);
if reprocess_tx
.try_send(ReprocessQueueMessage::BlockImported(*root))
.is_err()
{
error!(
self.log,
"Failed to inform block import";
"source" => "rpc",
"block_root" => %root,
)
};
}
if result_tx.send(block_result).is_err() {
crit!(self.log, "Failed return sync block result");
}
// Drop the handle to remove the entry from the cache
drop(handle);
} else {
debug!(
self.log,
"Gossip block is being imported";
"block_root" => %block_root,
);
// The gossip block that is being imported should eventually
// trigger reprocessing of queued attestations once it is imported.
// If the gossip block fails import, then it will be downscored
// appropriately in `process_gossip_block`.
// Here, we assume that the block will eventually be imported and
// send a `BlockIsAlreadyKnown` message to sync.
if result_tx
.send(Err(BlockError::BlockIsAlreadyKnown))
.is_err()
{
crit!(self.log, "Failed return sync block result");
self.run_fork_choice()
}
}
// Sync handles these results
self.send_sync_message(SyncMessage::BlockProcessed {
process_type,
result: result.map(|_| ()),
});
// Drop the handle to remove the entry from the cache
drop(handle);
}
/// Attempt to import the chain segment (`blocks`) to the beacon chain, informing the sync
/// thread if more blocks are needed to process it.
pub fn process_chain_segment(
&self,
process_id: ProcessId,
sync_type: ChainSegmentProcessId,
downloaded_blocks: Vec<SignedBeaconBlock<T::EthSpec>>,
) {
match process_id {
let result = match sync_type {
// this a request from the range sync
ProcessId::RangeBatchId(chain_id, epoch) => {
ChainSegmentProcessId::RangeBatchId(chain_id, epoch) => {
let start_slot = downloaded_blocks.first().map(|b| b.slot().as_u64());
let end_slot = downloaded_blocks.last().map(|b| b.slot().as_u64());
let sent_blocks = downloaded_blocks.len();
let result = match self.process_blocks(downloaded_blocks.iter()) {
match self.process_blocks(downloaded_blocks.iter()) {
(_, Ok(_)) => {
debug!(self.log, "Batch processed";
"batch_epoch" => epoch,
@ -139,19 +130,15 @@ impl<T: BeaconChainTypes> Worker<T> {
peer_action: e.peer_action,
}
}
};
let sync_type = SyncRequestType::RangeSync(epoch, chain_id);
self.send_sync_message(SyncMessage::BatchProcessed { sync_type, result });
}
}
// this a request from the Backfill sync
ProcessId::BackSyncBatchId(epoch) => {
ChainSegmentProcessId::BackSyncBatchId(epoch) => {
let start_slot = downloaded_blocks.first().map(|b| b.slot().as_u64());
let end_slot = downloaded_blocks.last().map(|b| b.slot().as_u64());
let sent_blocks = downloaded_blocks.len();
let result = match self.process_backfill_blocks(&downloaded_blocks) {
match self.process_backfill_blocks(&downloaded_blocks) {
(_, Ok(_)) => {
debug!(self.log, "Backfill batch processed";
"batch_epoch" => epoch,
@ -173,35 +160,34 @@ impl<T: BeaconChainTypes> Worker<T> {
peer_action: e.peer_action,
}
}
};
let sync_type = SyncRequestType::BackFillSync(epoch);
self.send_sync_message(SyncMessage::BatchProcessed { sync_type, result });
}
}
// this is a parent lookup request from the sync manager
ProcessId::ParentLookup(peer_id, chain_head) => {
ChainSegmentProcessId::ParentLookup(chain_head) => {
debug!(
self.log, "Processing parent lookup";
"last_peer_id" => %peer_id,
"chain_hash" => %chain_head,
"blocks" => downloaded_blocks.len()
);
// parent blocks are ordered from highest slot to lowest, so we need to process in
// reverse
match self.process_blocks(downloaded_blocks.iter().rev()) {
(_, Err(e)) => {
debug!(self.log, "Parent lookup failed"; "last_peer_id" => %peer_id, "error" => %e.message);
self.send_sync_message(SyncMessage::ParentLookupFailed {
peer_id,
chain_head,
})
(imported_blocks, Err(e)) => {
debug!(self.log, "Parent lookup failed"; "error" => %e.message);
BatchProcessResult::Failed {
imported_blocks: imported_blocks > 0,
peer_action: e.peer_action,
}
}
(_, Ok(_)) => {
(imported_blocks, Ok(_)) => {
debug!(self.log, "Parent lookup processed successfully");
BatchProcessResult::Success(imported_blocks > 0)
}
}
}
}
};
self.send_sync_message(SyncMessage::BatchProcessed { sync_type, result });
}
/// Helper function to process blocks batches which only consumes the chain and blocks to process.

View File

@ -286,6 +286,14 @@ lazy_static! {
"Number of Syncing chains in range, per range type",
&["range_type"]
);
pub static ref SYNC_SINGLE_BLOCK_LOOKUPS: Result<IntGauge> = try_create_int_gauge(
"sync_single_block_lookups",
"Number of single block lookups underway"
);
pub static ref SYNC_PARENT_BLOCK_LOOKUPS: Result<IntGauge> = try_create_int_gauge(
"sync_parent_block_lookups",
"Number of parent block lookups underway"
);
/*
* Block Delay Metrics

View File

@ -8,7 +8,7 @@
//! If a batch fails, the backfill sync cannot progress. In this scenario, we mark the backfill
//! sync as failed, log an error and attempt to retry once a new peer joins the node.
use crate::beacon_processor::{ProcessId, WorkEvent as BeaconWorkEvent};
use crate::beacon_processor::{ChainSegmentProcessId, WorkEvent as BeaconWorkEvent};
use crate::sync::manager::{BatchProcessResult, Id};
use crate::sync::network_context::SyncNetworkContext;
use crate::sync::range_sync::{BatchConfig, BatchId, BatchInfo, BatchState};
@ -534,7 +534,7 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
Ok(v) => v,
};
let process_id = ProcessId::BackSyncBatchId(batch_id);
let process_id = ChainSegmentProcessId::BackSyncBatchId(batch_id);
self.current_processing_batch = Some(batch_id);
if let Err(e) = self

View File

@ -0,0 +1,637 @@
use std::collections::hash_map::Entry;
use std::time::Duration;
use beacon_chain::{BeaconChainTypes, BlockError};
use fnv::FnvHashMap;
use lighthouse_network::{PeerAction, PeerId};
use lru_cache::LRUCache;
use slog::{crit, debug, error, trace, warn, Logger};
use smallvec::SmallVec;
use store::{Hash256, SignedBeaconBlock};
use strum::AsStaticRef;
use tokio::sync::mpsc;
use crate::beacon_processor::{ChainSegmentProcessId, WorkEvent};
use crate::metrics;
use self::{
parent_lookup::{ParentLookup, VerifyError},
single_block_lookup::SingleBlockRequest,
};
use super::BatchProcessResult;
use super::{
manager::{BlockProcessType, Id},
network_context::SyncNetworkContext,
};
mod parent_lookup;
mod single_block_lookup;
#[cfg(test)]
mod tests;
const FAILED_CHAINS_CACHE_SIZE: usize = 500;
const SINGLE_BLOCK_LOOKUP_MAX_ATTEMPTS: u8 = 3;
pub(crate) struct BlockLookups<T: BeaconChainTypes> {
/// A collection of parent block lookups.
parent_queue: SmallVec<[ParentLookup<T::EthSpec>; 3]>,
/// A cache of failed chain lookups to prevent duplicate searches.
failed_chains: LRUCache<Hash256>,
/// A collection of block hashes being searched for and a flag indicating if a result has been
/// received or not.
///
/// The flag allows us to determine if the peer returned data or sent us nothing.
single_block_lookups: FnvHashMap<Id, SingleBlockRequest<SINGLE_BLOCK_LOOKUP_MAX_ATTEMPTS>>,
/// A multi-threaded, non-blocking processor for applying messages to the beacon chain.
beacon_processor_send: mpsc::Sender<WorkEvent<T>>,
/// The logger for the import manager.
log: Logger,
}
impl<T: BeaconChainTypes> BlockLookups<T> {
pub fn new(beacon_processor_send: mpsc::Sender<WorkEvent<T>>, log: Logger) -> Self {
Self {
parent_queue: Default::default(),
failed_chains: LRUCache::new(FAILED_CHAINS_CACHE_SIZE),
single_block_lookups: Default::default(),
beacon_processor_send,
log,
}
}
/* Lookup requests */
pub fn search_block(
&mut self,
hash: Hash256,
peer_id: PeerId,
cx: &mut SyncNetworkContext<T::EthSpec>,
) {
// Do not re-request a block that is already being requested
if self
.single_block_lookups
.values_mut()
.any(|single_block_request| single_block_request.add_peer(&hash, &peer_id))
{
return;
}
debug!(
self.log,
"Searching for block";
"peer_id" => %peer_id,
"block" => %hash
);
let mut single_block_request = SingleBlockRequest::new(hash, peer_id);
let (peer_id, request) = single_block_request.request_block().unwrap();
if let Ok(request_id) = cx.single_block_lookup_request(peer_id, request) {
self.single_block_lookups
.insert(request_id, single_block_request);
metrics::set_gauge(
&metrics::SYNC_SINGLE_BLOCK_LOOKUPS,
self.single_block_lookups.len() as i64,
);
}
}
pub fn search_parent(
&mut self,
block: Box<SignedBeaconBlock<T::EthSpec>>,
peer_id: PeerId,
cx: &mut SyncNetworkContext<T::EthSpec>,
) {
let block_root = block.canonical_root();
let parent_root = block.parent_root();
// If this block or it's parent is part of a known failed chain, ignore it.
if self.failed_chains.contains(&parent_root) || self.failed_chains.contains(&block_root) {
debug!(self.log, "Block is from a past failed chain. Dropping";
"block_root" => ?block_root, "block_slot" => block.slot());
return;
}
// Make sure this block is not already downloaded, and that neither it or its parent is
// being searched for.
if self.parent_queue.iter_mut().any(|parent_req| {
parent_req.contains_block(&block)
|| parent_req.add_peer(&block_root, &peer_id)
|| parent_req.add_peer(&parent_root, &peer_id)
}) {
// we are already searching for this block, ignore it
return;
}
let parent_lookup = ParentLookup::new(*block, peer_id);
self.request_parent(parent_lookup, cx);
}
/* Lookup responses */
pub fn single_block_lookup_response(
&mut self,
id: Id,
peer_id: PeerId,
block: Option<Box<SignedBeaconBlock<T::EthSpec>>>,
seen_timestamp: Duration,
cx: &mut SyncNetworkContext<T::EthSpec>,
) {
let mut request = match self.single_block_lookups.entry(id) {
Entry::Occupied(req) => req,
Entry::Vacant(_) => {
if block.is_some() {
crit!(
self.log,
"Block returned for single block lookup not present"
);
#[cfg(debug_assertions)]
panic!("block returned for single block lookup not present");
}
return;
}
};
match request.get_mut().verify_block(block) {
Ok(Some(block)) => {
// This is the correct block, send it for processing
if self
.send_block_for_processing(
block,
seen_timestamp,
BlockProcessType::SingleBlock { id },
)
.is_err()
{
// Remove to avoid inconsistencies
self.single_block_lookups.remove(&id);
}
}
Ok(None) => {
// request finished correctly, it will be removed after the block is processed.
}
Err(error) => {
let msg: &str = error.as_static();
cx.report_peer(peer_id, PeerAction::LowToleranceError, msg);
// Remove the request, if it can be retried it will be added with a new id.
let mut req = request.remove();
debug!(self.log, "Single block lookup failed";
"peer_id" => %peer_id, "error" => msg, "block_root" => %req.hash);
// try the request again if possible
if let Ok((peer_id, request)) = req.request_block() {
if let Ok(id) = cx.single_block_lookup_request(peer_id, request) {
self.single_block_lookups.insert(id, req);
}
}
}
}
metrics::set_gauge(
&metrics::SYNC_SINGLE_BLOCK_LOOKUPS,
self.single_block_lookups.len() as i64,
);
}
pub fn parent_lookup_response(
&mut self,
id: Id,
peer_id: PeerId,
block: Option<Box<SignedBeaconBlock<T::EthSpec>>>,
seen_timestamp: Duration,
cx: &mut SyncNetworkContext<T::EthSpec>,
) {
let mut parent_lookup = if let Some(pos) = self
.parent_queue
.iter()
.position(|request| request.pending_response(id))
{
self.parent_queue.remove(pos)
} else {
if block.is_some() {
debug!(self.log, "Response for a parent lookup request that was not found"; "peer_id" => %peer_id);
}
return;
};
match parent_lookup.verify_block(block, &self.failed_chains) {
Ok(Some(block)) => {
// Block is correct, send to the beacon processor.
let chain_hash = parent_lookup.chain_hash();
if self
.send_block_for_processing(
block,
seen_timestamp,
BlockProcessType::ParentLookup { chain_hash },
)
.is_ok()
{
self.parent_queue.push(parent_lookup)
}
}
Ok(None) => {
// Request finished successfully, nothing else to do. It will be removed after the
// processing result arrives.
self.parent_queue.push(parent_lookup);
}
Err(e) => match e {
VerifyError::RootMismatch
| VerifyError::NoBlockReturned
| VerifyError::ExtraBlocksReturned => {
let e = e.as_static();
warn!(self.log, "Peer sent invalid response to parent request.";
"peer_id" => %peer_id, "reason" => e);
// We do not tolerate these kinds of errors. We will accept a few but these are signs
// of a faulty peer.
cx.report_peer(peer_id, PeerAction::LowToleranceError, e);
// We try again if possible.
self.request_parent(parent_lookup, cx);
}
VerifyError::PreviousFailure { parent_root } => {
self.failed_chains.insert(parent_lookup.chain_hash());
debug!(
self.log,
"Parent chain ignored due to past failure";
"block" => %parent_root,
);
// Add the root block to failed chains
self.failed_chains.insert(parent_lookup.chain_hash());
cx.report_peer(
peer_id,
PeerAction::MidToleranceError,
"bbroot_failed_chains",
);
}
},
};
metrics::set_gauge(
&metrics::SYNC_PARENT_BLOCK_LOOKUPS,
self.parent_queue.len() as i64,
);
}
/* Error responses */
#[allow(clippy::needless_collect)] // false positive
pub fn peer_disconnected(&mut self, peer_id: &PeerId, cx: &mut SyncNetworkContext<T::EthSpec>) {
/* Check disconnection for single block lookups */
// better written after https://github.com/rust-lang/rust/issues/59618
let remove_retry_ids: Vec<Id> = self
.single_block_lookups
.iter_mut()
.filter_map(|(id, req)| {
if req.check_peer_disconnected(peer_id).is_err() {
Some(*id)
} else {
None
}
})
.collect();
for mut req in remove_retry_ids
.into_iter()
.map(|id| self.single_block_lookups.remove(&id).unwrap())
.collect::<Vec<_>>()
{
// retry the request
match req.request_block() {
Ok((peer_id, block_request)) => {
if let Ok(request_id) = cx.single_block_lookup_request(peer_id, block_request) {
self.single_block_lookups.insert(request_id, req);
}
}
Err(e) => {
trace!(self.log, "Single block request failed on peer disconnection";
"block_root" => %req.hash, "peer_id" => %peer_id, "reason" => e.as_static());
}
}
}
/* Check disconnection for parent lookups */
while let Some(pos) = self
.parent_queue
.iter_mut()
.position(|req| req.check_peer_disconnected(peer_id).is_err())
{
let parent_lookup = self.parent_queue.remove(pos);
trace!(self.log, "Parent lookup's peer disconnected"; &parent_lookup);
self.request_parent(parent_lookup, cx);
}
}
pub fn parent_lookup_failed(
&mut self,
id: Id,
peer_id: PeerId,
cx: &mut SyncNetworkContext<T::EthSpec>,
) {
if let Some(pos) = self
.parent_queue
.iter()
.position(|request| request.pending_response(id))
{
let mut parent_lookup = self.parent_queue.remove(pos);
parent_lookup.download_failed();
trace!(self.log, "Parent lookup request failed"; &parent_lookup);
self.request_parent(parent_lookup, cx);
} else {
return debug!(self.log, "RPC failure for a parent lookup request that was not found"; "peer_id" => %peer_id);
};
metrics::set_gauge(
&metrics::SYNC_PARENT_BLOCK_LOOKUPS,
self.parent_queue.len() as i64,
);
}
pub fn single_block_lookup_failed(&mut self, id: Id, cx: &mut SyncNetworkContext<T::EthSpec>) {
if let Some(mut request) = self.single_block_lookups.remove(&id) {
request.register_failure();
trace!(self.log, "Single block lookup failed"; "block" => %request.hash);
if let Ok((peer_id, block_request)) = request.request_block() {
if let Ok(request_id) = cx.single_block_lookup_request(peer_id, block_request) {
self.single_block_lookups.insert(request_id, request);
}
}
}
metrics::set_gauge(
&metrics::SYNC_SINGLE_BLOCK_LOOKUPS,
self.single_block_lookups.len() as i64,
);
}
/* Processing responses */
pub fn single_block_processed(
&mut self,
id: Id,
result: Result<(), BlockError<T::EthSpec>>,
cx: &mut SyncNetworkContext<T::EthSpec>,
) {
let mut req = match self.single_block_lookups.remove(&id) {
Some(req) => req,
None => {
#[cfg(debug_assertions)]
panic!("block processed for single block lookup not present");
#[cfg(not(debug_assertions))]
return crit!(
self.log,
"Block processed for single block lookup not present"
);
}
};
let root = req.hash;
let peer_id = match req.processing_peer() {
Ok(peer) => peer,
Err(_) => return,
};
if let Err(e) = &result {
trace!(self.log, "Single block processing failed"; "block" => %root, "error" => %e);
} else {
trace!(self.log, "Single block processing succeeded"; "block" => %root);
}
match result {
Err(e) => match e {
BlockError::BlockIsAlreadyKnown => {
// No error here
}
BlockError::BeaconChainError(e) => {
// Internal error
error!(self.log, "Beacon chain error processing single block"; "block_root" => %root, "error" => ?e);
}
BlockError::ParentUnknown(block) => {
self.search_parent(block, peer_id, cx);
}
other => {
warn!(self.log, "Peer sent invalid block in single block lookup"; "root" => %root, "error" => ?other, "peer_id" => %peer_id);
cx.report_peer(
peer_id,
PeerAction::MidToleranceError,
"single_block_failure",
);
// Try it again if possible.
req.register_failure();
if let Ok((peer_id, request)) = req.request_block() {
if let Ok(request_id) = cx.single_block_lookup_request(peer_id, request) {
// insert with the new id
self.single_block_lookups.insert(request_id, req);
}
}
}
},
Ok(()) => {
// No error here
}
}
metrics::set_gauge(
&metrics::SYNC_SINGLE_BLOCK_LOOKUPS,
self.single_block_lookups.len() as i64,
);
}
pub fn parent_block_processed(
&mut self,
chain_hash: Hash256,
result: Result<(), BlockError<T::EthSpec>>,
cx: &mut SyncNetworkContext<T::EthSpec>,
) {
let (mut parent_lookup, peer_id) = if let Some((pos, peer)) = self
.parent_queue
.iter()
.enumerate()
.find_map(|(pos, request)| {
request
.get_processing_peer(chain_hash)
.map(|peer| (pos, peer))
}) {
(self.parent_queue.remove(pos), peer)
} else {
#[cfg(debug_assertions)]
panic!(
"Process response for a parent lookup request that was not found. Chain_hash: {}",
chain_hash
);
#[cfg(not(debug_assertions))]
return crit!(self.log, "Process response for a parent lookup request that was not found"; "chain_hash" => %chain_hash);
};
if let Err(e) = &result {
trace!(self.log, "Parent block processing failed"; &parent_lookup, "error" => %e);
} else {
trace!(self.log, "Parent block processing succeeded"; &parent_lookup);
}
match result {
Err(BlockError::ParentUnknown(block)) => {
// need to keep looking for parents
// add the block back to the queue and continue the search
parent_lookup.add_block(*block);
self.request_parent(parent_lookup, cx);
}
Ok(_) | Err(BlockError::BlockIsAlreadyKnown { .. }) => {
let chain_hash = parent_lookup.chain_hash();
let blocks = parent_lookup.chain_blocks();
let process_id = ChainSegmentProcessId::ParentLookup(chain_hash);
match self
.beacon_processor_send
.try_send(WorkEvent::chain_segment(process_id, blocks))
{
Ok(_) => {
self.parent_queue.push(parent_lookup);
}
Err(e) => {
error!(
self.log,
"Failed to send chain segment to processor";
"error" => ?e
);
}
}
}
Err(outcome) => {
// all else we consider the chain a failure and downvote the peer that sent
// us the last block
warn!(
self.log, "Invalid parent chain";
"score_adjustment" => %PeerAction::MidToleranceError,
"outcome" => ?outcome,
"last_peer" => %peer_id,
);
// Add this chain to cache of failed chains
self.failed_chains.insert(chain_hash);
// This currently can be a host of errors. We permit this due to the partial
// ambiguity.
cx.report_peer(peer_id, PeerAction::MidToleranceError, "parent_request_err");
}
}
metrics::set_gauge(
&metrics::SYNC_PARENT_BLOCK_LOOKUPS,
self.parent_queue.len() as i64,
);
}
pub fn parent_chain_processed(
&mut self,
chain_hash: Hash256,
result: BatchProcessResult,
cx: &mut SyncNetworkContext<T::EthSpec>,
) {
let parent_lookup = if let Some(pos) = self
.parent_queue
.iter()
.position(|request| request.chain_hash() == chain_hash)
{
self.parent_queue.remove(pos)
} else {
#[cfg(debug_assertions)]
panic!(
"Chain process response for a parent lookup request that was not found. Chain_hash: {}",
chain_hash
);
#[cfg(not(debug_assertions))]
return crit!(self.log, "Chain process response for a parent lookup request that was not found"; "chain_hash" => %chain_hash);
};
debug!(self.log, "Parent chain processed"; "chain_hash" => %chain_hash, "result" => ?result);
match result {
BatchProcessResult::Success(_) => {
// nothing to do.
}
BatchProcessResult::Failed {
imported_blocks: _,
peer_action,
} => {
self.failed_chains.insert(parent_lookup.chain_hash());
if let Some(peer_action) = peer_action {
for &peer_id in parent_lookup.used_peers() {
cx.report_peer(peer_id, peer_action, "parent_chain_failure")
}
}
}
}
metrics::set_gauge(
&metrics::SYNC_PARENT_BLOCK_LOOKUPS,
self.parent_queue.len() as i64,
);
}
/* Helper functions */
fn send_block_for_processing(
&mut self,
block: Box<SignedBeaconBlock<T::EthSpec>>,
duration: Duration,
process_type: BlockProcessType,
) -> Result<(), ()> {
trace!(self.log, "Sending block for processing"; "block" => %block.canonical_root(), "process" => ?process_type);
let event = WorkEvent::rpc_beacon_block(block, duration, process_type);
if let Err(e) = self.beacon_processor_send.try_send(event) {
error!(
self.log,
"Failed to send sync block to processor";
"error" => ?e
);
return Err(());
}
Ok(())
}
fn request_parent(
&mut self,
mut parent_lookup: ParentLookup<T::EthSpec>,
cx: &mut SyncNetworkContext<T::EthSpec>,
) {
match parent_lookup.request_parent(cx) {
Err(e) => {
debug!(self.log, "Failed to request parent"; &parent_lookup, "error" => e.as_static());
match e {
parent_lookup::RequestError::SendFailed(_) => {
// Probably shutting down, nothing to do here. Drop the request
}
parent_lookup::RequestError::ChainTooLong
| parent_lookup::RequestError::TooManyAttempts => {
self.failed_chains.insert(parent_lookup.chain_hash());
// This indicates faulty peers.
for &peer_id in parent_lookup.used_peers() {
cx.report_peer(peer_id, PeerAction::LowToleranceError, e.as_static())
}
}
parent_lookup::RequestError::NoPeers => {
// This happens if the peer disconnects while the block is being
// processed. Drop the request without extra penalty
}
}
}
Ok(_) => {
debug!(self.log, "Requesting parent"; &parent_lookup);
self.parent_queue.push(parent_lookup)
}
}
// We remove and add back again requests so we want this updated regardless of outcome.
metrics::set_gauge(
&metrics::SYNC_PARENT_BLOCK_LOOKUPS,
self.parent_queue.len() as i64,
);
}
}

View File

@ -0,0 +1,201 @@
use lighthouse_network::PeerId;
use store::{EthSpec, Hash256, SignedBeaconBlock};
use strum::AsStaticStr;
use crate::sync::{
manager::{Id, SLOT_IMPORT_TOLERANCE},
network_context::SyncNetworkContext,
};
use super::single_block_lookup::{self, SingleBlockRequest};
/// How many attempts we try to find a parent of a block before we give up trying .
pub(crate) const PARENT_FAIL_TOLERANCE: u8 = 5;
/// The maximum depth we will search for a parent block. In principle we should have sync'd any
/// canonical chain to its head once the peer connects. A chain should not appear where it's depth
/// is further back than the most recent head slot.
pub(crate) const PARENT_DEPTH_TOLERANCE: usize = SLOT_IMPORT_TOLERANCE * 2;
/// Maintains a sequential list of parents to lookup and the lookup's current state.
pub(crate) struct ParentLookup<T: EthSpec> {
/// The root of the block triggering this parent request.
chain_hash: Hash256,
/// The blocks that have currently been downloaded.
downloaded_blocks: Vec<SignedBeaconBlock<T>>,
/// Request of the last parent.
current_parent_request: SingleBlockRequest<PARENT_FAIL_TOLERANCE>,
/// Id of the last parent request.
current_parent_request_id: Option<Id>,
}
#[derive(Debug, PartialEq, Eq, AsStaticStr)]
pub enum VerifyError {
RootMismatch,
NoBlockReturned,
ExtraBlocksReturned,
PreviousFailure { parent_root: Hash256 },
}
#[derive(Debug, PartialEq, Eq)]
pub enum RequestError {
SendFailed(&'static str),
ChainTooLong,
TooManyAttempts,
NoPeers,
}
impl<T: EthSpec> ParentLookup<T> {
pub fn contains_block(&self, block: &SignedBeaconBlock<T>) -> bool {
self.downloaded_blocks
.iter()
.any(|d_block| d_block == block)
}
pub fn new(block: SignedBeaconBlock<T>, peer_id: PeerId) -> Self {
let current_parent_request = SingleBlockRequest::new(block.parent_root(), peer_id);
Self {
chain_hash: block.canonical_root(),
downloaded_blocks: vec![block],
current_parent_request,
current_parent_request_id: None,
}
}
/// Attempts to request the next unknown parent. If the request fails, it should be removed.
pub fn request_parent(&mut self, cx: &mut SyncNetworkContext<T>) -> Result<(), RequestError> {
// check to make sure this request hasn't failed
if self.downloaded_blocks.len() >= PARENT_DEPTH_TOLERANCE {
return Err(RequestError::ChainTooLong);
}
let (peer_id, request) = self.current_parent_request.request_block()?;
match cx.parent_lookup_request(peer_id, request) {
Ok(request_id) => {
self.current_parent_request_id = Some(request_id);
Ok(())
}
Err(reason) => {
self.current_parent_request_id = None;
Err(RequestError::SendFailed(reason))
}
}
}
pub fn check_peer_disconnected(&mut self, peer_id: &PeerId) -> Result<(), ()> {
self.current_parent_request.check_peer_disconnected(peer_id)
}
pub fn add_block(&mut self, block: SignedBeaconBlock<T>) {
let next_parent = block.parent_root();
self.downloaded_blocks.push(block);
self.current_parent_request.hash = next_parent;
self.current_parent_request.state = single_block_lookup::State::AwaitingDownload;
self.current_parent_request_id = None;
}
pub fn pending_response(&self, req_id: Id) -> bool {
self.current_parent_request_id == Some(req_id)
}
/// Get the parent lookup's chain hash.
pub fn chain_hash(&self) -> Hash256 {
self.chain_hash
}
pub fn download_failed(&mut self) {
self.current_parent_request.register_failure();
self.current_parent_request_id = None;
}
pub fn chain_blocks(&mut self) -> Vec<SignedBeaconBlock<T>> {
std::mem::take(&mut self.downloaded_blocks)
}
/// Verifies that the received block is what we requested. If so, parent lookup now waits for
/// the processing result of the block.
pub fn verify_block(
&mut self,
block: Option<Box<SignedBeaconBlock<T>>>,
failed_chains: &lru_cache::LRUCache<Hash256>,
) -> Result<Option<Box<SignedBeaconBlock<T>>>, VerifyError> {
let block = self.current_parent_request.verify_block(block)?;
// check if the parent of this block isn't in the failed cache. If it is, this chain should
// be dropped and the peer downscored.
if let Some(parent_root) = block.as_ref().map(|block| block.parent_root()) {
if failed_chains.contains(&parent_root) {
self.current_parent_request.register_failure();
self.current_parent_request_id = None;
return Err(VerifyError::PreviousFailure { parent_root });
}
}
Ok(block)
}
pub fn get_processing_peer(&self, chain_hash: Hash256) -> Option<PeerId> {
if self.chain_hash == chain_hash {
return self.current_parent_request.processing_peer().ok();
}
None
}
#[cfg(test)]
pub fn failed_attempts(&self) -> u8 {
self.current_parent_request.failed_attempts
}
pub fn add_peer(&mut self, block_root: &Hash256, peer_id: &PeerId) -> bool {
self.current_parent_request.add_peer(block_root, peer_id)
}
pub fn used_peers(&self) -> impl Iterator<Item = &PeerId> + '_ {
self.current_parent_request.used_peers.iter()
}
}
impl From<super::single_block_lookup::VerifyError> for VerifyError {
fn from(e: super::single_block_lookup::VerifyError) -> Self {
use super::single_block_lookup::VerifyError as E;
match e {
E::RootMismatch => VerifyError::RootMismatch,
E::NoBlockReturned => VerifyError::NoBlockReturned,
E::ExtraBlocksReturned => VerifyError::ExtraBlocksReturned,
}
}
}
impl From<super::single_block_lookup::LookupRequestError> for RequestError {
fn from(e: super::single_block_lookup::LookupRequestError) -> Self {
use super::single_block_lookup::LookupRequestError as E;
match e {
E::TooManyAttempts => RequestError::TooManyAttempts,
E::NoPeers => RequestError::NoPeers,
}
}
}
impl<T: EthSpec> slog::KV for ParentLookup<T> {
fn serialize(
&self,
record: &slog::Record,
serializer: &mut dyn slog::Serializer,
) -> slog::Result {
serializer.emit_arguments("chain_hash", &format_args!("{}", self.chain_hash))?;
slog::Value::serialize(&self.current_parent_request, record, "parent", serializer)?;
serializer.emit_usize("downloaded_blocks", self.downloaded_blocks.len())?;
slog::Result::Ok(())
}
}
impl RequestError {
pub fn as_static(&self) -> &'static str {
match self {
RequestError::SendFailed(e) => e,
RequestError::ChainTooLong => "chain_too_long",
RequestError::TooManyAttempts => "too_many_attempts",
RequestError::NoPeers => "no_peers",
}
}
}

View File

@ -0,0 +1,209 @@
use std::collections::HashSet;
use lighthouse_network::{rpc::BlocksByRootRequest, PeerId};
use rand::seq::IteratorRandom;
use ssz_types::VariableList;
use store::{EthSpec, Hash256, SignedBeaconBlock};
use strum::AsStaticStr;
/// Object representing a single block lookup request.
#[derive(PartialEq, Eq)]
pub struct SingleBlockRequest<const MAX_ATTEMPTS: u8> {
/// The hash of the requested block.
pub hash: Hash256,
/// State of this request.
pub state: State,
/// Peers that should have this block.
pub available_peers: HashSet<PeerId>,
/// Peers from which we have requested this block.
pub used_peers: HashSet<PeerId>,
/// How many times have we attempted this block.
pub failed_attempts: u8,
}
#[derive(Debug, PartialEq, Eq)]
pub enum State {
AwaitingDownload,
Downloading { peer_id: PeerId },
Processing { peer_id: PeerId },
}
#[derive(Debug, PartialEq, Eq, AsStaticStr)]
pub enum VerifyError {
RootMismatch,
NoBlockReturned,
ExtraBlocksReturned,
}
#[derive(Debug, PartialEq, Eq, AsStaticStr)]
pub enum LookupRequestError {
TooManyAttempts,
NoPeers,
}
impl<const MAX_ATTEMPTS: u8> SingleBlockRequest<MAX_ATTEMPTS> {
pub fn new(hash: Hash256, peer_id: PeerId) -> Self {
Self {
hash,
state: State::AwaitingDownload,
available_peers: HashSet::from([peer_id]),
used_peers: HashSet::default(),
failed_attempts: 0,
}
}
pub fn register_failure(&mut self) {
self.failed_attempts += 1;
self.state = State::AwaitingDownload;
}
pub fn add_peer(&mut self, hash: &Hash256, peer_id: &PeerId) -> bool {
let is_useful = &self.hash == hash;
if is_useful {
self.available_peers.insert(*peer_id);
}
is_useful
}
/// If a peer disconnects, this request could be failed. If so, an error is returned
pub fn check_peer_disconnected(&mut self, dc_peer_id: &PeerId) -> Result<(), ()> {
self.available_peers.remove(dc_peer_id);
if let State::Downloading { peer_id } = &self.state {
if peer_id == dc_peer_id {
// Peer disconnected before providing a block
self.register_failure();
return Err(());
}
}
Ok(())
}
/// Verifies if the received block matches the requested one.
/// Returns the block for processing if the response is what we expected.
pub fn verify_block<T: EthSpec>(
&mut self,
block: Option<Box<SignedBeaconBlock<T>>>,
) -> Result<Option<Box<SignedBeaconBlock<T>>>, VerifyError> {
match self.state {
State::AwaitingDownload => {
self.register_failure();
Err(VerifyError::ExtraBlocksReturned)
}
State::Downloading { peer_id } => match block {
Some(block) => {
if block.canonical_root() != self.hash {
// return an error and drop the block
self.register_failure();
Err(VerifyError::RootMismatch)
} else {
// Return the block for processing.
self.state = State::Processing { peer_id };
Ok(Some(block))
}
}
None => {
self.register_failure();
Err(VerifyError::NoBlockReturned)
}
},
State::Processing { peer_id: _ } => match block {
Some(_) => {
// We sent the block for processing and received an extra block.
self.register_failure();
Err(VerifyError::ExtraBlocksReturned)
}
None => {
// This is simply the stream termination and we are already processing the
// block
Ok(None)
}
},
}
}
pub fn request_block(&mut self) -> Result<(PeerId, BlocksByRootRequest), LookupRequestError> {
debug_assert!(matches!(self.state, State::AwaitingDownload));
if self.failed_attempts <= MAX_ATTEMPTS {
if let Some(&peer_id) = self.available_peers.iter().choose(&mut rand::thread_rng()) {
let request = BlocksByRootRequest {
block_roots: VariableList::from(vec![self.hash]),
};
self.state = State::Downloading { peer_id };
self.used_peers.insert(peer_id);
Ok((peer_id, request))
} else {
Err(LookupRequestError::NoPeers)
}
} else {
Err(LookupRequestError::TooManyAttempts)
}
}
pub fn processing_peer(&self) -> Result<PeerId, ()> {
if let State::Processing { peer_id } = &self.state {
Ok(*peer_id)
} else {
Err(())
}
}
}
impl<const MAX_ATTEMPTS: u8> slog::Value for SingleBlockRequest<MAX_ATTEMPTS> {
fn serialize(
&self,
record: &slog::Record,
key: slog::Key,
serializer: &mut dyn slog::Serializer,
) -> slog::Result {
serializer.emit_str("request", key)?;
serializer.emit_arguments("hash", &format_args!("{}", self.hash))?;
match &self.state {
State::AwaitingDownload => {
"awaiting_download".serialize(record, "state", serializer)?
}
State::Downloading { peer_id } => {
serializer.emit_arguments("downloading_peer", &format_args!("{}", peer_id))?
}
State::Processing { peer_id } => {
serializer.emit_arguments("processing_peer", &format_args!("{}", peer_id))?
}
}
slog::Result::Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
use types::test_utils::{SeedableRng, TestRandom, XorShiftRng};
use types::MinimalEthSpec as E;
fn rand_block() -> SignedBeaconBlock<E> {
let mut rng = XorShiftRng::from_seed([42; 16]);
SignedBeaconBlock::from_block(
types::BeaconBlock::Base(types::BeaconBlockBase {
..<_>::random_for_test(&mut rng)
}),
types::Signature::random_for_test(&mut rng),
)
}
#[test]
fn test_happy_path() {
let peer_id = PeerId::random();
let block = rand_block();
let mut sl = SingleBlockRequest::<4>::new(block.canonical_root(), peer_id);
sl.request_block().unwrap();
sl.verify_block(Some(Box::new(block))).unwrap().unwrap();
}
#[test]
fn test_max_attempts() {
let peer_id = PeerId::random();
let block = rand_block();
let mut sl = SingleBlockRequest::<4>::new(block.canonical_root(), peer_id);
sl.register_failure();
}
}

View File

@ -0,0 +1,460 @@
use std::sync::Arc;
use crate::service::RequestId;
use crate::sync::manager::RequestId as SyncId;
use crate::NetworkMessage;
use super::*;
use beacon_chain::builder::Witness;
use beacon_chain::eth1_chain::CachingEth1Backend;
use lighthouse_network::{NetworkGlobals, Request};
use slog::{Drain, Level};
use slot_clock::SystemTimeSlotClock;
use store::MemoryStore;
use types::test_utils::{SeedableRng, TestRandom, XorShiftRng};
use types::MinimalEthSpec as E;
type T = Witness<SystemTimeSlotClock, CachingEth1Backend<E>, E, MemoryStore<E>, MemoryStore<E>>;
struct TestRig {
beacon_processor_rx: mpsc::Receiver<WorkEvent<T>>,
network_rx: mpsc::UnboundedReceiver<NetworkMessage<E>>,
rng: XorShiftRng,
}
const D: Duration = Duration::new(0, 0);
impl TestRig {
fn test_setup(log_level: Option<Level>) -> (BlockLookups<T>, SyncNetworkContext<E>, Self) {
let log = {
let decorator = slog_term::TermDecorator::new().build();
let drain = slog_term::FullFormat::new(decorator).build().fuse();
let drain = slog_async::Async::new(drain).build().fuse();
if let Some(log_level) = log_level {
slog::Logger::root(drain.filter_level(log_level).fuse(), slog::o!())
} else {
slog::Logger::root(drain.filter(|_| false).fuse(), slog::o!())
}
};
let (beacon_processor_tx, beacon_processor_rx) = mpsc::channel(100);
let (network_tx, network_rx) = mpsc::unbounded_channel();
let rng = XorShiftRng::from_seed([42; 16]);
let rig = TestRig {
beacon_processor_rx,
network_rx,
rng,
};
let bl = BlockLookups::new(
beacon_processor_tx,
log.new(slog::o!("component" => "block_lookups")),
);
let cx = {
let globals = Arc::new(NetworkGlobals::new_test_globals(&log));
SyncNetworkContext::new(
network_tx,
globals,
log.new(slog::o!("component" => "network_context")),
)
};
(bl, cx, rig)
}
fn rand_block(&mut self) -> SignedBeaconBlock<E> {
SignedBeaconBlock::from_block(
types::BeaconBlock::Base(types::BeaconBlockBase {
..<_>::random_for_test(&mut self.rng)
}),
types::Signature::random_for_test(&mut self.rng),
)
}
#[track_caller]
fn expect_block_request(&mut self) -> Id {
match self.network_rx.try_recv() {
Ok(NetworkMessage::SendRequest {
peer_id: _,
request: Request::BlocksByRoot(_request),
request_id: RequestId::Sync(SyncId::SingleBlock { id }),
}) => id,
other => {
panic!("Expected block request, found {:?}", other);
}
}
}
#[track_caller]
fn expect_parent_request(&mut self) -> Id {
match self.network_rx.try_recv() {
Ok(NetworkMessage::SendRequest {
peer_id: _,
request: Request::BlocksByRoot(_request),
request_id: RequestId::Sync(SyncId::ParentLookup { id }),
}) => id,
other => panic!("Expected parent request, found {:?}", other),
}
}
#[track_caller]
fn expect_block_process(&mut self) {
match self.beacon_processor_rx.try_recv() {
Ok(work) => {
assert_eq!(work.work_type(), crate::beacon_processor::RPC_BLOCK);
}
other => panic!("Expected block process, found {:?}", other),
}
}
#[track_caller]
fn expect_parent_chain_process(&mut self) {
match self.beacon_processor_rx.try_recv() {
Ok(work) => {
assert_eq!(work.work_type(), crate::beacon_processor::CHAIN_SEGMENT);
}
other => panic!("Expected chain segment process, found {:?}", other),
}
}
#[track_caller]
fn expect_empty_network(&mut self) {
assert_eq!(
self.network_rx.try_recv().expect_err("must err"),
mpsc::error::TryRecvError::Empty
);
}
#[track_caller]
pub fn expect_penalty(&mut self) {
match self.network_rx.try_recv() {
Ok(NetworkMessage::ReportPeer { .. }) => {}
other => panic!("Expected peer penalty, found {:?}", other),
}
}
pub fn block_with_parent(&mut self, parent_root: Hash256) -> SignedBeaconBlock<E> {
SignedBeaconBlock::from_block(
types::BeaconBlock::Base(types::BeaconBlockBase {
parent_root,
..<_>::random_for_test(&mut self.rng)
}),
types::Signature::random_for_test(&mut self.rng),
)
}
}
#[test]
fn test_single_block_lookup_happy_path() {
let (mut bl, mut cx, mut rig) = TestRig::test_setup(None);
let block = rig.rand_block();
let peer_id = PeerId::random();
// Trigger the request
bl.search_block(block.canonical_root(), peer_id, &mut cx);
let id = rig.expect_block_request();
// The peer provides the correct block, should not be penalized. Now the block should be sent
// for processing.
bl.single_block_lookup_response(id, peer_id, Some(Box::new(block)), D, &mut cx);
rig.expect_empty_network();
rig.expect_block_process();
// The request should still be active.
assert_eq!(bl.single_block_lookups.len(), 1);
// Send the stream termination. Peer should have not been penalized, and the request removed
// after processing.
bl.single_block_lookup_response(id, peer_id, None, D, &mut cx);
bl.single_block_processed(id, Ok(()), &mut cx);
rig.expect_empty_network();
assert_eq!(bl.single_block_lookups.len(), 0);
}
#[test]
fn test_single_block_lookup_empty_response() {
let (mut bl, mut cx, mut rig) = TestRig::test_setup(None);
let block_hash = Hash256::random();
let peer_id = PeerId::random();
// Trigger the request
bl.search_block(block_hash, peer_id, &mut cx);
let id = rig.expect_block_request();
// The peer does not have the block. It should be penalized.
bl.single_block_lookup_response(id, peer_id, None, D, &mut cx);
rig.expect_penalty();
rig.expect_block_request(); // it should be retried
}
#[test]
fn test_single_block_lookup_wrong_response() {
let (mut bl, mut cx, mut rig) = TestRig::test_setup(None);
let block_hash = Hash256::random();
let peer_id = PeerId::random();
// Trigger the request
bl.search_block(block_hash, peer_id, &mut cx);
let id = rig.expect_block_request();
// Peer sends something else. It should be penalized.
let bad_block = rig.rand_block();
bl.single_block_lookup_response(id, peer_id, Some(Box::new(bad_block)), D, &mut cx);
rig.expect_penalty();
rig.expect_block_request(); // should be retried
// Send the stream termination. This should not produce an additional penalty.
bl.single_block_lookup_response(id, peer_id, None, D, &mut cx);
rig.expect_empty_network();
}
#[test]
fn test_single_block_lookup_failure() {
let (mut bl, mut cx, mut rig) = TestRig::test_setup(None);
let block_hash = Hash256::random();
let peer_id = PeerId::random();
// Trigger the request
bl.search_block(block_hash, peer_id, &mut cx);
let id = rig.expect_block_request();
// The request fails. RPC failures are handled elsewhere so we should not penalize the peer.
bl.single_block_lookup_failed(id, &mut cx);
rig.expect_block_request();
rig.expect_empty_network();
}
#[test]
fn test_single_block_lookup_becomes_parent_request() {
let (mut bl, mut cx, mut rig) = TestRig::test_setup(None);
let block = rig.rand_block();
let peer_id = PeerId::random();
// Trigger the request
bl.search_block(block.canonical_root(), peer_id, &mut cx);
let id = rig.expect_block_request();
// The peer provides the correct block, should not be penalized. Now the block should be sent
// for processing.
bl.single_block_lookup_response(id, peer_id, Some(Box::new(block.clone())), D, &mut cx);
rig.expect_empty_network();
rig.expect_block_process();
// The request should still be active.
assert_eq!(bl.single_block_lookups.len(), 1);
// Send the stream termination. Peer should have not been penalized, and the request moved to a
// parent request after processing.
bl.single_block_processed(id, Err(BlockError::ParentUnknown(Box::new(block))), &mut cx);
assert_eq!(bl.single_block_lookups.len(), 0);
rig.expect_parent_request();
rig.expect_empty_network();
assert_eq!(bl.parent_queue.len(), 1);
}
#[test]
fn test_parent_lookup_happy_path() {
let (mut bl, mut cx, mut rig) = TestRig::test_setup(None);
let parent = rig.rand_block();
let block = rig.block_with_parent(parent.canonical_root());
let chain_hash = block.canonical_root();
let peer_id = PeerId::random();
// Trigger the request
bl.search_parent(Box::new(block), peer_id, &mut cx);
let id = rig.expect_parent_request();
// Peer sends the right block, it should be sent for processing. Peer should not be penalized.
bl.parent_lookup_response(id, peer_id, Some(Box::new(parent)), D, &mut cx);
rig.expect_block_process();
rig.expect_empty_network();
// Processing succeeds, now the rest of the chain should be sent for processing.
bl.parent_block_processed(chain_hash, Err(BlockError::BlockIsAlreadyKnown), &mut cx);
rig.expect_parent_chain_process();
bl.parent_chain_processed(chain_hash, BatchProcessResult::Success(true), &mut cx);
assert_eq!(bl.parent_queue.len(), 0);
}
#[test]
fn test_parent_lookup_wrong_response() {
let (mut bl, mut cx, mut rig) = TestRig::test_setup(None);
let parent = rig.rand_block();
let block = rig.block_with_parent(parent.canonical_root());
let chain_hash = block.canonical_root();
let peer_id = PeerId::random();
// Trigger the request
bl.search_parent(Box::new(block), peer_id, &mut cx);
let id1 = rig.expect_parent_request();
// Peer sends the wrong block, peer should be penalized and the block re-requested.
let bad_block = rig.rand_block();
bl.parent_lookup_response(id1, peer_id, Some(Box::new(bad_block)), D, &mut cx);
rig.expect_penalty();
let id2 = rig.expect_parent_request();
// Send the stream termination for the first request. This should not produce extra penalties.
bl.parent_lookup_response(id1, peer_id, None, D, &mut cx);
rig.expect_empty_network();
// Send the right block this time.
bl.parent_lookup_response(id2, peer_id, Some(Box::new(parent)), D, &mut cx);
rig.expect_block_process();
// Processing succeeds, now the rest of the chain should be sent for processing.
bl.parent_block_processed(chain_hash, Ok(()), &mut cx);
rig.expect_parent_chain_process();
bl.parent_chain_processed(chain_hash, BatchProcessResult::Success(true), &mut cx);
assert_eq!(bl.parent_queue.len(), 0);
}
#[test]
fn test_parent_lookup_empty_response() {
let (mut bl, mut cx, mut rig) = TestRig::test_setup(None);
let parent = rig.rand_block();
let block = rig.block_with_parent(parent.canonical_root());
let chain_hash = block.canonical_root();
let peer_id = PeerId::random();
// Trigger the request
bl.search_parent(Box::new(block), peer_id, &mut cx);
let id1 = rig.expect_parent_request();
// Peer sends an empty response, peer should be penalized and the block re-requested.
bl.parent_lookup_response(id1, peer_id, None, D, &mut cx);
rig.expect_penalty();
let id2 = rig.expect_parent_request();
// Send the right block this time.
bl.parent_lookup_response(id2, peer_id, Some(Box::new(parent)), D, &mut cx);
rig.expect_block_process();
// Processing succeeds, now the rest of the chain should be sent for processing.
bl.parent_block_processed(chain_hash, Ok(()), &mut cx);
rig.expect_parent_chain_process();
bl.parent_chain_processed(chain_hash, BatchProcessResult::Success(true), &mut cx);
assert_eq!(bl.parent_queue.len(), 0);
}
#[test]
fn test_parent_lookup_rpc_failure() {
let (mut bl, mut cx, mut rig) = TestRig::test_setup(None);
let parent = rig.rand_block();
let block = rig.block_with_parent(parent.canonical_root());
let chain_hash = block.canonical_root();
let peer_id = PeerId::random();
// Trigger the request
bl.search_parent(Box::new(block), peer_id, &mut cx);
let id1 = rig.expect_parent_request();
// The request fails. It should be tried again.
bl.parent_lookup_failed(id1, peer_id, &mut cx);
let id2 = rig.expect_parent_request();
// Send the right block this time.
bl.parent_lookup_response(id2, peer_id, Some(Box::new(parent)), D, &mut cx);
rig.expect_block_process();
// Processing succeeds, now the rest of the chain should be sent for processing.
bl.parent_block_processed(chain_hash, Ok(()), &mut cx);
rig.expect_parent_chain_process();
bl.parent_chain_processed(chain_hash, BatchProcessResult::Success(true), &mut cx);
assert_eq!(bl.parent_queue.len(), 0);
}
#[test]
fn test_parent_lookup_too_many_attempts() {
let (mut bl, mut cx, mut rig) = TestRig::test_setup(None);
let parent = rig.rand_block();
let block = rig.block_with_parent(parent.canonical_root());
let chain_hash = block.canonical_root();
let peer_id = PeerId::random();
// Trigger the request
bl.search_parent(Box::new(block), peer_id, &mut cx);
for i in 1..=parent_lookup::PARENT_FAIL_TOLERANCE + 1 {
let id = rig.expect_parent_request();
match i % 2 {
// make sure every error is accounted for
0 => {
// The request fails. It should be tried again.
bl.parent_lookup_failed(id, peer_id, &mut cx);
}
_ => {
// Send a bad block this time. It should be tried again.
let bad_block = rig.rand_block();
bl.parent_lookup_response(id, peer_id, Some(Box::new(bad_block)), D, &mut cx);
rig.expect_penalty();
}
}
if i < parent_lookup::PARENT_FAIL_TOLERANCE {
assert_eq!(bl.parent_queue[0].failed_attempts(), dbg!(i));
}
}
assert_eq!(bl.parent_queue.len(), 0);
assert!(bl.failed_chains.contains(&chain_hash));
}
#[test]
fn test_parent_lookup_too_deep() {
let (mut bl, mut cx, mut rig) = TestRig::test_setup(None);
let mut blocks =
Vec::<SignedBeaconBlock<E>>::with_capacity(parent_lookup::PARENT_DEPTH_TOLERANCE);
while blocks.len() < parent_lookup::PARENT_DEPTH_TOLERANCE {
let parent = blocks
.last()
.map(|b| b.canonical_root())
.unwrap_or_else(Hash256::random);
let block = rig.block_with_parent(parent);
blocks.push(block);
}
let peer_id = PeerId::random();
let trigger_block = blocks.pop().unwrap();
let chain_hash = trigger_block.canonical_root();
bl.search_parent(Box::new(trigger_block), peer_id, &mut cx);
for block in blocks.into_iter().rev() {
let id = rig.expect_parent_request();
// the block
bl.parent_lookup_response(id, peer_id, Some(Box::new(block.clone())), D, &mut cx);
// the stream termination
bl.parent_lookup_response(id, peer_id, None, D, &mut cx);
// the processing request
rig.expect_block_process();
// the processing result
bl.parent_block_processed(
chain_hash,
Err(BlockError::ParentUnknown(Box::new(block))),
&mut cx,
)
}
rig.expect_penalty();
assert!(bl.failed_chains.contains(&chain_hash));
}
#[test]
fn test_parent_lookup_disconnection() {
let (mut bl, mut cx, mut rig) = TestRig::test_setup(None);
let peer_id = PeerId::random();
let trigger_block = rig.rand_block();
bl.search_parent(Box::new(trigger_block), peer_id, &mut cx);
bl.peer_disconnected(&peer_id, &mut cx);
assert!(bl.parent_queue.is_empty());
}

View File

@ -34,29 +34,25 @@
//! search for the block and subsequently search for parents if needed.
use super::backfill_sync::{BackFillSync, ProcessResult, SyncStart};
use super::block_lookups::BlockLookups;
use super::network_context::SyncNetworkContext;
use super::peer_sync_info::{remote_sync_type, PeerSyncType};
use super::range_sync::{ChainId, RangeSync, RangeSyncType, EPOCHS_PER_BATCH};
use crate::beacon_processor::{ProcessId, WorkEvent as BeaconWorkEvent};
use super::range_sync::{RangeSync, RangeSyncType, EPOCHS_PER_BATCH};
use crate::beacon_processor::{ChainSegmentProcessId, WorkEvent as BeaconWorkEvent};
use crate::service::NetworkMessage;
use crate::status::ToStatusMessage;
use beacon_chain::{BeaconChain, BeaconChainTypes, BlockError};
use fnv::FnvHashMap;
use lighthouse_network::rpc::{methods::MAX_REQUEST_BLOCKS, BlocksByRootRequest, GoodbyeReason};
use lighthouse_network::rpc::methods::MAX_REQUEST_BLOCKS;
use lighthouse_network::types::{NetworkGlobals, SyncState};
use lighthouse_network::SyncInfo;
use lighthouse_network::{PeerAction, PeerId};
use lru_cache::LRUCache;
use slog::{crit, debug, error, info, trace, warn, Logger};
use smallvec::SmallVec;
use ssz_types::VariableList;
use slog::{crit, debug, error, info, trace, Logger};
use std::boxed::Box;
use std::collections::hash_map::Entry;
use std::ops::Sub;
use std::sync::Arc;
use std::time::Duration;
use tokio::sync::mpsc;
use types::{Epoch, EthSpec, Hash256, SignedBeaconBlock, Slot};
use types::{EthSpec, Hash256, SignedBeaconBlock, Slot};
/// The number of slots ahead of us that is allowed before requesting a long-range (batch) Sync
/// from a peer. If a peer is within this tolerance (forwards or backwards), it is treated as a
@ -66,12 +62,6 @@ use types::{Epoch, EthSpec, Hash256, SignedBeaconBlock, Slot};
/// gossip if no peers are further than this range ahead of us that we have not already downloaded
/// blocks for.
pub const SLOT_IMPORT_TOLERANCE: usize = 32;
/// How many attempts we try to find a parent of a block before we give up trying .
const PARENT_FAIL_TOLERANCE: usize = 5;
/// The maximum depth we will search for a parent block. In principle we should have sync'd any
/// canonical chain to its head once the peer connects. A chain should not appear where it's depth
/// is further back than the most recent head slot.
const PARENT_DEPTH_TOLERANCE: usize = SLOT_IMPORT_TOLERANCE * 2;
pub type Id = u32;
@ -120,26 +110,22 @@ pub enum SyncMessage<T: EthSpec> {
/// A batch has been processed by the block processor thread.
BatchProcessed {
sync_type: SyncRequestType,
sync_type: ChainSegmentProcessId,
result: BatchProcessResult,
},
/// A parent lookup has failed.
ParentLookupFailed {
/// The head of the chain of blocks that failed to process.
chain_head: Hash256,
/// The peer that instigated the chain lookup.
peer_id: PeerId,
/// Block processed
BlockProcessed {
process_type: BlockProcessType,
result: Result<(), BlockError<T>>,
},
}
/// The type of sync request made
/// The type of processing specified for a received block.
#[derive(Debug, Clone)]
pub enum SyncRequestType {
/// Request was from the backfill sync algorithm.
BackFillSync(Epoch),
/// The request was from a chain in the range sync algorithm.
RangeSync(Epoch, ChainId),
pub enum BlockProcessType {
SingleBlock { id: Id },
ParentLookup { chain_hash: Hash256 },
}
/// The result of processing multiple blocks (a chain segment).
@ -154,23 +140,6 @@ pub enum BatchProcessResult {
},
}
/// Maintains a sequential list of parents to lookup and the lookup's current state.
struct ParentRequests<T: EthSpec> {
/// The blocks that have currently been downloaded.
downloaded_blocks: Vec<SignedBeaconBlock<T>>,
/// The number of failed attempts to retrieve a parent block. If too many attempts occur, this
/// lookup is failed and rejected.
failed_attempts: usize,
/// The peer who last submitted a block. If the chain ends or fails, this is the peer that is
/// penalized.
last_submitted_peer: PeerId,
/// The request ID of this lookup is in progress.
pending: Option<Id>,
}
/// The primary object for handling and driving all the current syncing logic. It maintains the
/// current state of the syncing process, the number of useful peers, downloaded blocks and
/// controls the logic behind both the long-range (batch) sync and the on-going potential parent
@ -194,45 +163,12 @@ pub struct SyncManager<T: BeaconChainTypes> {
/// Backfill syncing.
backfill_sync: BackFillSync<T>,
/// A collection of parent block lookups.
parent_queue: SmallVec<[ParentRequests<T::EthSpec>; 3]>,
/// A cache of failed chain lookups to prevent duplicate searches.
failed_chains: LRUCache<Hash256>,
/// A collection of block hashes being searched for and a flag indicating if a result has been
/// received or not.
///
/// The flag allows us to determine if the peer returned data or sent us nothing.
single_block_lookups: FnvHashMap<Id, SingleBlockRequest>,
/// A multi-threaded, non-blocking processor for applying messages to the beacon chain.
beacon_processor_send: mpsc::Sender<BeaconWorkEvent<T>>,
/// Used for spawning tasks.
executor: task_executor::TaskExecutor,
block_lookups: BlockLookups<T>,
/// The logger for the import manager.
log: Logger,
}
/// Object representing a single block lookup request.
struct SingleBlockRequest {
/// The hash of the requested block.
pub hash: Hash256,
/// Whether a block was received from this request, or the peer returned an empty response.
pub block_returned: bool,
}
impl SingleBlockRequest {
pub fn new(hash: Hash256) -> Self {
Self {
hash,
block_returned: false,
}
}
}
/// Spawns a new `SyncManager` thread which has a weak reference to underlying beacon
/// chain. This allows the chain to be
/// dropped during the syncing process which will gracefully end the `SyncManager`.
@ -253,26 +189,22 @@ pub fn spawn<T: BeaconChainTypes>(
// create an instance of the SyncManager
let mut sync_manager = SyncManager {
chain: beacon_chain.clone(),
network_globals: network_globals.clone(),
input_channel: sync_recv,
network: SyncNetworkContext::new(network_send, network_globals.clone(), log.clone()),
range_sync: RangeSync::new(
beacon_chain.clone(),
beacon_processor_send.clone(),
log.clone(),
),
backfill_sync: BackFillSync::new(
beacon_chain.clone(),
network_globals.clone(),
beacon_chain,
network_globals,
beacon_processor_send.clone(),
log.clone(),
),
network: SyncNetworkContext::new(network_send, network_globals.clone(), log.clone()),
chain: beacon_chain,
network_globals,
input_channel: sync_recv,
parent_queue: SmallVec::new(),
failed_chains: LRUCache::new(500),
single_block_lookups: FnvHashMap::default(),
beacon_processor_send,
executor: executor.clone(),
block_lookups: BlockLookups::new(beacon_processor_send, log.clone()),
log: log.clone(),
};
@ -322,321 +254,17 @@ impl<T: BeaconChainTypes> SyncManager<T> {
self.update_sync_state();
}
/// The response to a `BlocksByRoot` request.
/// The current implementation takes one block at a time. As blocks are streamed, any
/// subsequent blocks will simply be ignored.
/// There are two reasons we could have received a BlocksByRoot response
/// - We requested a single hash and have received a response for the single_block_lookup
/// - We are looking up parent blocks in parent lookup search
async fn parent_lookup_response(
&mut self,
peer_id: PeerId,
request_id: Id,
block: Option<SignedBeaconBlock<T::EthSpec>>,
_seen_timestamp: Duration,
) {
let mut parent_request = if let Some(pos) = self
.parent_queue
.iter()
.position(|request| request.pending == Some(request_id))
{
// we remove from the queue and process it. It will get re-added if required
self.parent_queue.remove(pos)
} else {
if block.is_some() {
debug!(self.log, "Response for a parent lookup request that was not found"; "peer_id" => %peer_id);
}
return;
};
match block {
Some(block) => {
// data was returned, not just a stream termination
// check if the parent of this block isn't in our failed cache. If it is, this
// chain should be dropped and the peer downscored.
if self.failed_chains.contains(&block.message().parent_root()) {
debug!(
self.log,
"Parent chain ignored due to past failure";
"block" => ?block.message().parent_root(),
"slot" => block.slot()
);
if !parent_request.downloaded_blocks.is_empty() {
// Add the root block to failed chains
self.failed_chains
.insert(parent_request.downloaded_blocks[0].canonical_root());
} else {
crit!(self.log, "Parent chain has no blocks");
}
self.network.report_peer(
peer_id,
PeerAction::MidToleranceError,
"bbroot_failed_chains",
);
return;
}
// add the block to response
parent_request.downloaded_blocks.push(block);
// queue for processing
self.process_parent_request(parent_request).await;
}
None => {
// An empty response has been returned to a parent request
// if an empty response is given, the peer didn't have the requested block, try again
parent_request.failed_attempts += 1;
parent_request.last_submitted_peer = peer_id;
self.request_parent(parent_request);
}
}
}
async fn process_block_async(
&mut self,
block: SignedBeaconBlock<T::EthSpec>,
) -> Option<Result<Hash256, BlockError<T::EthSpec>>> {
let (event, rx) = BeaconWorkEvent::rpc_beacon_block(Box::new(block));
match self.beacon_processor_send.try_send(event) {
Ok(_) => {}
Err(e) => {
error!(
self.log,
"Failed to send sync block to processor";
"error" => ?e
);
return None;
}
}
match rx.await {
Ok(block_result) => Some(block_result),
Err(_) => {
warn!(
self.log,
"Sync block not processed";
"msg" => "likely due to system resource exhaustion"
);
None
}
}
}
/// Processes the response obtained from a single block lookup search. If the block is
/// processed or errors, the search ends. If the blocks parent is unknown, a block parent
/// lookup search is started.
async fn single_block_lookup_response(
&mut self,
request_id: Id,
peer_id: PeerId,
block: Option<SignedBeaconBlock<T::EthSpec>>,
seen_timestamp: Duration,
) {
if let Entry::Occupied(mut entry) = self.single_block_lookups.entry(request_id) {
match block {
None => {
// Stream termination. Remove the lookup
let (_, single_block_request) = entry.remove_entry();
// The peer didn't respond with a block that it referenced.
// This can be allowed as some clients may implement pruning. We mildly
// tolerate this behaviour.
if !single_block_request.block_returned {
warn!(self.log, "Peer didn't respond with a block it referenced";
"referenced_block_hash" => %single_block_request.hash, "peer_id" => %peer_id);
self.network.report_peer(
peer_id,
PeerAction::MidToleranceError,
"bbroot_no_block",
);
}
}
Some(block) => {
// update the state of the lookup indicating a block was received from the peer
entry.get_mut().block_returned = true;
// verify the hash is correct and try and process the block
if entry.get().hash != block.canonical_root() {
// The peer that sent this, sent us the wrong block.
// We do not tolerate this behaviour. The peer is instantly disconnected and banned.
warn!(self.log, "Peer sent incorrect block for single block lookup"; "peer_id" => %peer_id);
self.network.goodbye_peer(peer_id, GoodbyeReason::Fault);
return;
}
let block_result = match self.process_block_async(block.clone()).await {
Some(block_result) => block_result,
None => return,
};
// we have the correct block, try and process it
match block_result {
Ok(block_root) => {
// Block has been processed, so write the block time to the cache.
self.chain.block_times_cache.write().set_time_observed(
block_root,
block.slot(),
seen_timestamp,
None,
None,
);
info!(self.log, "Processed block"; "block" => %block_root);
// Spawn `BeaconChain::fork_choice` in a blocking task. It's
// potentially long-running and it might panic if run from an async
// context.
let chain = self.chain.clone();
let log = self.log.clone();
self.executor.spawn_blocking(
move || match chain.fork_choice() {
Ok(()) => trace!(
log,
"Fork choice success";
"location" => "single block"
),
Err(e) => error!(
log,
"Fork choice failed";
"error" => ?e,
"location" => "single block"
),
},
"sync_manager_fork_choice",
);
}
Err(BlockError::ParentUnknown { .. }) => {
// We don't know of the blocks parent, begin a parent lookup search
self.add_unknown_block(peer_id, block);
}
Err(BlockError::BlockIsAlreadyKnown) => {
trace!(self.log, "Single block lookup already known");
}
Err(BlockError::BeaconChainError(e)) => {
warn!(self.log, "Unexpected block processing error"; "error" => ?e);
}
outcome => {
warn!(self.log, "Single block lookup failed"; "outcome" => ?outcome);
// This could be a range of errors. But we couldn't process the block.
// For now we consider this a mid tolerance error.
self.network.report_peer(
peer_id,
PeerAction::MidToleranceError,
"single_block_lookup_failed",
);
}
}
}
}
}
}
/// A block has been sent to us that has an unknown parent. This begins a parent lookup search
/// to find the parent or chain of parents that match our current chain.
fn add_unknown_block(&mut self, peer_id: PeerId, block: SignedBeaconBlock<T::EthSpec>) {
// If we are not synced or within SLOT_IMPORT_TOLERANCE of the block, ignore
if !self.network_globals.sync_state.read().is_synced() {
let head_slot = self
.chain
.head_info()
.map(|info| info.slot)
.unwrap_or_else(|_| Slot::from(0u64));
let unknown_block_slot = block.slot();
// if the block is far in the future, ignore it. If its within the slot tolerance of
// our current head, regardless of the syncing state, fetch it.
if (head_slot >= unknown_block_slot
&& head_slot.sub(unknown_block_slot).as_usize() > SLOT_IMPORT_TOLERANCE)
|| (head_slot < unknown_block_slot
&& unknown_block_slot.sub(head_slot).as_usize() > SLOT_IMPORT_TOLERANCE)
{
return;
}
}
let block_root = block.canonical_root();
// If this block or it's parent is part of a known failed chain, ignore it.
if self.failed_chains.contains(&block.message().parent_root())
|| self.failed_chains.contains(&block_root)
{
debug!(self.log, "Block is from a past failed chain. Dropping"; "block_root" => ?block_root, "block_slot" => block.slot());
return;
}
// Make sure this block is not already being searched for
// NOTE: Potentially store a hashset of blocks for O(1) lookups
for parent_req in self.parent_queue.iter() {
if parent_req
.downloaded_blocks
.iter()
.any(|d_block| d_block == &block)
{
// we are already searching for this block, ignore it
return;
}
}
debug!(self.log, "Unknown block received. Starting a parent lookup"; "block_slot" => block.slot(), "block_hash" => %block.canonical_root());
let parent_request = ParentRequests {
downloaded_blocks: vec![block],
failed_attempts: 0,
last_submitted_peer: peer_id,
pending: None,
};
self.request_parent(parent_request)
}
/// A request to search for a block hash has been received. This function begins a BlocksByRoot
/// request to find the requested block.
fn search_for_block(&mut self, peer_id: PeerId, block_hash: Hash256) {
// If we are not synced, ignore this block
if !self.network_globals.sync_state.read().is_synced() {
return;
}
// Do not re-request a block that is already being requested
if self
.single_block_lookups
.values()
.any(|single_block_request| single_block_request.hash == block_hash)
{
return;
}
debug!(
self.log,
"Searching for block";
"peer_id" => %peer_id,
"block" => %block_hash
);
let request = BlocksByRootRequest {
block_roots: VariableList::from(vec![block_hash]),
};
if let Ok(request_id) = self.network.single_block_lookup_request(peer_id, request) {
self.single_block_lookups
.insert(request_id, SingleBlockRequest::new(block_hash));
}
}
/// Handles RPC errors related to requests that were emitted from the sync manager.
fn inject_error(&mut self, peer_id: PeerId, request_id: RequestId) {
trace!(self.log, "Sync manager received a failed RPC");
match request_id {
RequestId::SingleBlock { id } => {
self.single_block_lookups.remove(&id);
self.block_lookups
.single_block_lookup_failed(id, &mut self.network);
}
RequestId::ParentLookup { id } => {
if let Some(pos) = self
.parent_queue
.iter()
.position(|request| request.pending == Some(id))
{
// increment the failure of a parent lookup if the request matches a parent search
let mut parent_request = self.parent_queue.remove(pos);
parent_request.failed_attempts += 1;
parent_request.last_submitted_peer = peer_id;
self.request_parent(parent_request);
}
self.block_lookups
.parent_lookup_failed(id, peer_id, &mut self.network);
}
RequestId::BackFillSync { id } => {
if let Some(batch_id) = self.network.backfill_sync_response(id, true) {
@ -666,6 +294,8 @@ impl<T: BeaconChainTypes> SyncManager<T> {
fn peer_disconnect(&mut self, peer_id: &PeerId) {
self.range_sync.peer_disconnect(&mut self.network, peer_id);
self.block_lookups
.peer_disconnected(peer_id, &mut self.network);
// Regardless of the outcome, we update the sync status.
let _ = self
.backfill_sync
@ -824,187 +454,6 @@ impl<T: BeaconChainTypes> SyncManager<T> {
}
}
/* Processing State Functions */
// These functions are called in the main poll function to transition the state of the sync
// manager
/// A new block has been received for a parent lookup query, process it.
async fn process_parent_request(&mut self, mut parent_request: ParentRequests<T::EthSpec>) {
// verify the last added block is the parent of the last requested block
if parent_request.downloaded_blocks.len() < 2 {
crit!(
self.log,
"There must be at least two blocks in a parent request lookup at all times"
);
panic!("There must be at least two blocks in parent request lookup at all times");
// fail loudly
}
let previous_index = parent_request.downloaded_blocks.len() - 2;
let expected_hash = parent_request.downloaded_blocks[previous_index].parent_root();
// Note: the length must be greater than 2 so this cannot panic.
let block_hash = parent_request
.downloaded_blocks
.last()
.expect("Complete batch cannot be empty")
.canonical_root();
if block_hash != expected_hash {
// The sent block is not the correct block, remove the head block and downvote
// the peer
let _ = parent_request.downloaded_blocks.pop();
let peer = parent_request.last_submitted_peer;
warn!(self.log, "Peer sent invalid parent.";
"peer_id" => %peer,
"received_block" => %block_hash,
"expected_parent" => %expected_hash,
);
// We try again, but downvote the peer.
self.request_parent(parent_request);
// We do not tolerate these kinds of errors. We will accept a few but these are signs
// of a faulty peer.
self.network.report_peer(
peer,
PeerAction::LowToleranceError,
"parent_request_bad_hash",
);
} else {
// The last block in the queue is the only one that has not attempted to be processed yet.
//
// The logic here attempts to process the last block. If it can be processed, the rest
// of the blocks must have known parents. If any of them cannot be processed, we
// consider the entire chain corrupt and drop it, notifying the user.
//
// If the last block in the queue cannot be processed, we also drop the entire queue.
// If the last block in the queue has an unknown parent, we continue the parent
// lookup-search.
let chain_block_hash = parent_request.downloaded_blocks[0].canonical_root();
let newest_block = parent_request
.downloaded_blocks
.pop()
.expect("There is always at least one block in the queue");
let block_result = match self.process_block_async(newest_block.clone()).await {
Some(block_result) => block_result,
None => return,
};
match block_result {
Err(BlockError::ParentUnknown { .. }) => {
// need to keep looking for parents
// add the block back to the queue and continue the search
parent_request.downloaded_blocks.push(newest_block);
self.request_parent(parent_request);
}
Ok(_) | Err(BlockError::BlockIsAlreadyKnown { .. }) => {
let process_id = ProcessId::ParentLookup(
parent_request.last_submitted_peer,
chain_block_hash,
);
let blocks = parent_request.downloaded_blocks;
match self
.beacon_processor_send
.try_send(BeaconWorkEvent::chain_segment(process_id, blocks))
{
Ok(_) => {}
Err(e) => {
error!(
self.log,
"Failed to send chain segment to processor";
"error" => ?e
);
}
}
}
Err(outcome) => {
// all else we consider the chain a failure and downvote the peer that sent
// us the last block
warn!(
self.log, "Invalid parent chain";
"score_adjustment" => %PeerAction::MidToleranceError,
"outcome" => ?outcome,
"last_peer" => %parent_request.last_submitted_peer,
);
// Add this chain to cache of failed chains
self.failed_chains.insert(chain_block_hash);
// This currently can be a host of errors. We permit this due to the partial
// ambiguity.
self.network.report_peer(
parent_request.last_submitted_peer,
PeerAction::MidToleranceError,
"parent_request_err",
);
}
}
}
}
/// Progresses a parent request query.
///
/// This checks to ensure there a peers to progress the query, checks for failures and
/// initiates requests.
fn request_parent(&mut self, mut parent_request: ParentRequests<T::EthSpec>) {
// check to make sure this request hasn't failed
if parent_request.failed_attempts >= PARENT_FAIL_TOLERANCE
|| parent_request.downloaded_blocks.len() >= PARENT_DEPTH_TOLERANCE
{
let error = if parent_request.failed_attempts >= PARENT_FAIL_TOLERANCE {
// This is a peer-specific error and the chain could be continued with another
// peer. We don't consider this chain a failure and prevent retries with another
// peer.
"too many failed attempts"
} else if !parent_request.downloaded_blocks.is_empty() {
self.failed_chains
.insert(parent_request.downloaded_blocks[0].canonical_root());
"reached maximum lookup-depth"
} else {
crit!(self.log, "Parent lookup has no blocks");
"no blocks"
};
debug!(self.log, "Parent import failed";
"block" => ?parent_request.downloaded_blocks[0].canonical_root(),
"ancestors_found" => parent_request.downloaded_blocks.len(),
"reason" => error
);
// Downscore the peer.
self.network.report_peer(
parent_request.last_submitted_peer,
PeerAction::LowToleranceError,
"request_parent_import_failed",
);
return; // drop the request
}
let parent_hash = if let Some(block) = parent_request.downloaded_blocks.last() {
block.parent_root()
} else {
crit!(self.log, "Parent queue is empty. This should never happen");
return;
};
let request = BlocksByRootRequest {
block_roots: VariableList::from(vec![parent_hash]),
};
// We continue to search for the chain of blocks from the same peer. Other peers are not
// guaranteed to have this chain of blocks.
let peer_id = parent_request.last_submitted_peer;
if let Ok(request_id) = self.network.parent_lookup_request(peer_id, request) {
// if the request was successful add the queue back into self
parent_request.pending = Some(request_id);
self.parent_queue.push(parent_request);
}
}
/// The main driving future for the sync manager.
async fn main(&mut self) {
// process any inbound messages
@ -1020,19 +469,43 @@ impl<T: BeaconChainTypes> SyncManager<T> {
beacon_block,
seen_timestamp,
} => {
self.rpc_block_received(
request_id,
peer_id,
beacon_block.map(|b| *b),
seen_timestamp,
)
.await;
self.rpc_block_received(request_id, peer_id, beacon_block, seen_timestamp);
}
SyncMessage::UnknownBlock(peer_id, block) => {
self.add_unknown_block(peer_id, *block);
// If we are not synced or within SLOT_IMPORT_TOLERANCE of the block, ignore
if !self.network_globals.sync_state.read().is_synced() {
let head_slot = self
.chain
.head_info()
.map(|info| info.slot)
.unwrap_or_else(|_| Slot::from(0u64));
let unknown_block_slot = block.slot();
// if the block is far in the future, ignore it. If its within the slot tolerance of
// our current head, regardless of the syncing state, fetch it.
if (head_slot >= unknown_block_slot
&& head_slot.sub(unknown_block_slot).as_usize()
> SLOT_IMPORT_TOLERANCE)
|| (head_slot < unknown_block_slot
&& unknown_block_slot.sub(head_slot).as_usize()
> SLOT_IMPORT_TOLERANCE)
{
continue;
}
}
if self.network_globals.peers.read().is_connected(&peer_id) {
self.block_lookups
.search_parent(block, peer_id, &mut self.network);
}
}
SyncMessage::UnknownBlockHash(peer_id, block_hash) => {
self.search_for_block(peer_id, block_hash);
// If we are not synced, ignore this block.
if self.network_globals.sync_state.read().is_synced()
&& self.network_globals.peers.read().is_connected(&peer_id)
{
self.block_lookups
.search_block(block_hash, peer_id, &mut self.network);
}
}
SyncMessage::Disconnect(peer_id) => {
self.peer_disconnect(&peer_id);
@ -1041,8 +514,19 @@ impl<T: BeaconChainTypes> SyncManager<T> {
peer_id,
request_id,
} => self.inject_error(peer_id, request_id),
SyncMessage::BlockProcessed {
process_type,
result,
} => match process_type {
BlockProcessType::SingleBlock { id } => self
.block_lookups
.single_block_processed(id, result, &mut self.network),
BlockProcessType::ParentLookup { chain_hash } => self
.block_lookups
.parent_block_processed(chain_hash, result, &mut self.network),
},
SyncMessage::BatchProcessed { sync_type, result } => match sync_type {
SyncRequestType::RangeSync(epoch, chain_id) => {
ChainSegmentProcessId::RangeBatchId(chain_id, epoch) => {
self.range_sync.handle_block_process_result(
&mut self.network,
chain_id,
@ -1051,7 +535,7 @@ impl<T: BeaconChainTypes> SyncManager<T> {
);
self.update_sync_state();
}
SyncRequestType::BackFillSync(epoch) => {
ChainSegmentProcessId::BackSyncBatchId(epoch) => {
match self.backfill_sync.on_batch_process_result(
&mut self.network,
epoch,
@ -1066,41 +550,37 @@ impl<T: BeaconChainTypes> SyncManager<T> {
}
}
}
ChainSegmentProcessId::ParentLookup(chain_hash) => self
.block_lookups
.parent_chain_processed(chain_hash, result, &mut self.network),
},
SyncMessage::ParentLookupFailed {
chain_head,
peer_id,
} => {
// A peer sent an object (block or attestation) that referenced a parent.
// The processing of this chain failed.
self.failed_chains.insert(chain_head);
self.network.report_peer(
peer_id,
PeerAction::MidToleranceError,
"parent_lookup_failed",
);
}
}
}
}
}
async fn rpc_block_received(
fn rpc_block_received(
&mut self,
request_id: RequestId,
peer_id: PeerId,
beacon_block: Option<SignedBeaconBlock<T::EthSpec>>,
beacon_block: Option<Box<SignedBeaconBlock<T::EthSpec>>>,
seen_timestamp: Duration,
) {
match request_id {
RequestId::SingleBlock { id } => {
self.single_block_lookup_response(id, peer_id, beacon_block, seen_timestamp)
.await;
}
RequestId::ParentLookup { id } => {
self.parent_lookup_response(peer_id, id, beacon_block, seen_timestamp)
.await
}
RequestId::SingleBlock { id } => self.block_lookups.single_block_lookup_response(
id,
peer_id,
beacon_block,
seen_timestamp,
&mut self.network,
),
RequestId::ParentLookup { id } => self.block_lookups.parent_lookup_response(
id,
peer_id,
beacon_block,
seen_timestamp,
&mut self.network,
),
RequestId::BackFillSync { id } => {
if let Some(batch_id) = self
.network
@ -1111,7 +591,7 @@ impl<T: BeaconChainTypes> SyncManager<T> {
batch_id,
&peer_id,
id,
beacon_block,
beacon_block.map(|b| *b),
) {
Ok(ProcessResult::SyncCompleted) => self.update_sync_state(),
Ok(ProcessResult::Successful) => {}
@ -1133,7 +613,7 @@ impl<T: BeaconChainTypes> SyncManager<T> {
chain_id,
batch_id,
id,
beacon_block,
beacon_block.map(|b| *b),
);
self.update_sync_state();
}

View File

@ -2,6 +2,7 @@
//!
//! Stores the various syncing methods for the beacon chain.
mod backfill_sync;
mod block_lookups;
pub mod manager;
mod network_context;
mod peer_sync_info;

View File

@ -1,5 +1,5 @@
use super::batch::{BatchInfo, BatchState};
use crate::beacon_processor::ProcessId;
use crate::beacon_processor::ChainSegmentProcessId;
use crate::beacon_processor::WorkEvent as BeaconWorkEvent;
use crate::sync::{manager::Id, network_context::SyncNetworkContext, BatchProcessResult};
use beacon_chain::BeaconChainTypes;
@ -300,7 +300,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
// for removing chains and checking completion is in the callback.
let blocks = batch.start_processing()?;
let process_id = ProcessId::RangeBatchId(self.id, batch_id);
let process_id = ChainSegmentProcessId::RangeBatchId(self.id, batch_id);
self.current_processing_batch = Some(batch_id);
if let Err(e) = self