Add chain_id in range syncing to avoid wrong dispatching of batch results (#1037)

This commit is contained in:
divma 2020-04-22 06:17:56 -05:00 committed by GitHub
parent 2b6b2354e4
commit 2469bde6b1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 75 additions and 28 deletions

View File

@ -1,6 +1,6 @@
use crate::router::processor::FUTURE_SLOT_TOLERANCE;
use crate::sync::manager::SyncMessage;
use crate::sync::range_sync::BatchId;
use crate::sync::range_sync::{BatchId, ChainId};
use beacon_chain::{BeaconChain, BeaconChainTypes, BlockError, ChainSegmentResult};
use eth2_libp2p::PeerId;
use slog::{crit, debug, error, trace, warn};
@ -12,7 +12,7 @@ use types::SignedBeaconBlock;
#[derive(Clone, Debug, PartialEq)]
pub enum ProcessId {
/// Processing Id of a range syncing batch.
RangeBatchId(BatchId),
RangeBatchId(ChainId, BatchId),
/// Processing Id of the parent lookup of a block
ParentLookup(PeerId),
}
@ -40,7 +40,7 @@ pub fn spawn_block_processor<T: BeaconChainTypes>(
std::thread::spawn(move || {
match process_id {
// this a request from the range sync
ProcessId::RangeBatchId(batch_id) => {
ProcessId::RangeBatchId(chain_id, batch_id) => {
debug!(log, "Processing batch"; "id" => *batch_id, "blocks" => downloaded_blocks.len());
let result = match process_blocks(chain, downloaded_blocks.iter(), &log) {
(_, Ok(_)) => {
@ -59,8 +59,9 @@ pub fn spawn_block_processor<T: BeaconChainTypes>(
};
let msg = SyncMessage::BatchProcessed {
batch_id: batch_id,
downloaded_blocks: downloaded_blocks,
chain_id,
batch_id,
downloaded_blocks,
result,
};
sync_send.try_send(msg).unwrap_or_else(|_| {

View File

@ -35,7 +35,7 @@
use super::block_processor::{spawn_block_processor, BatchProcessResult, ProcessId};
use super::network_context::SyncNetworkContext;
use super::range_sync::{BatchId, RangeSync};
use super::range_sync::{BatchId, ChainId, RangeSync};
use crate::router::processor::PeerSyncInfo;
use crate::service::NetworkMessage;
use beacon_chain::{BeaconChain, BeaconChainTypes, BlockProcessingOutcome};
@ -99,6 +99,7 @@ pub enum SyncMessage<T: EthSpec> {
/// A batch has been processed by the block processor thread.
BatchProcessed {
chain_id: ChainId,
batch_id: BatchId,
downloaded_blocks: Vec<SignedBeaconBlock<T>>,
result: BatchProcessResult,
@ -731,12 +732,14 @@ impl<T: BeaconChainTypes> Future for SyncManager<T> {
self.inject_error(peer_id, request_id);
}
SyncMessage::BatchProcessed {
chain_id,
batch_id,
downloaded_blocks,
result,
} => {
self.range_sync.handle_block_process_result(
&mut self.network,
chain_id,
batch_id,
downloaded_blocks,
result,

View File

@ -38,10 +38,16 @@ pub enum ProcessingResult {
RemoveChain,
}
/// A chain identifier
pub type ChainId = u64;
/// A chain of blocks that need to be downloaded. Peers who claim to contain the target head
/// root are grouped into the peer pool and queried for batches when downloading the
/// chain.
pub struct SyncingChain<T: BeaconChainTypes> {
/// A random id used to identify this chain.
id: ChainId,
/// The original start slot when this chain was initialised.
pub start_slot: Slot,
@ -99,6 +105,7 @@ pub enum ChainSyncingState {
impl<T: BeaconChainTypes> SyncingChain<T> {
pub fn new(
id: u64,
start_slot: Slot,
target_head_slot: Slot,
target_head_root: Hash256,
@ -111,6 +118,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
peer_pool.insert(peer_id);
SyncingChain {
id,
start_slot,
target_head_slot,
target_head_root,
@ -242,11 +250,11 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
/// Sends a batch to the batch processor.
fn process_batch(&mut self, mut batch: Batch<T::EthSpec>) {
let downloaded_blocks = std::mem::replace(&mut batch.downloaded_blocks, Vec::new());
let batch_id = ProcessId::RangeBatchId(batch.id.clone());
let process_id = ProcessId::RangeBatchId(self.id.clone(), batch.id.clone());
self.current_processing_batch = Some(batch);
spawn_block_processor(
Arc::downgrade(&self.chain.clone()),
batch_id,
process_id,
downloaded_blocks,
self.sync_send.clone(),
self.log.clone(),
@ -258,26 +266,36 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
pub fn on_batch_process_result(
&mut self,
network: &mut SyncNetworkContext<T::EthSpec>,
chain_id: ChainId,
batch_id: BatchId,
downloaded_blocks: &mut Option<Vec<SignedBeaconBlock<T::EthSpec>>>,
result: &BatchProcessResult,
) -> Option<ProcessingResult> {
if let Some(current_batch) = &self.current_processing_batch {
if current_batch.id != batch_id {
// batch process does not belong to this chain
if chain_id != self.id {
// the result does not belong to this chain
return None;
}
match &self.current_processing_batch {
Some(current_batch) if current_batch.id != batch_id => {
debug!(self.log, "Unexpected batch result";
"chain_id" => self.id, "batch_id" => *batch_id, "expected_batch_id" => *current_batch.id);
return None;
}
// Continue. This is our processing request
} else {
// not waiting on a processing result
return None;
None => {
debug!(self.log, "Chain was not expecting a batch result";
"chain_id" => self.id, "batch_id" => *batch_id);
return None;
}
_ => {
// chain_id and batch_id match, continue
}
}
// claim the result by consuming the option
let downloaded_blocks = downloaded_blocks.take().or_else(|| {
// if taken by another chain, we are no longer waiting on a result.
self.current_processing_batch = None;
crit!(self.log, "Processed batch taken by another chain");
crit!(self.log, "Processed batch taken by another chain"; "chain_id" => self.id);
None
})?;
@ -289,6 +307,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
// double check batches are processed in order TODO: Remove for prod
if batch.id != self.to_be_processed_id {
crit!(self.log, "Batch processed out of order";
"chain_id" => self.id,
"processed_batch_id" => *batch.id,
"expected_id" => *self.to_be_processed_id);
}
@ -330,7 +349,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
}
BatchProcessResult::Partial => {
warn!(self.log, "Batch processing failed but at least one block was imported";
"id" => *batch.id, "peer" => format!("{}", batch.current_peer)
"chain_id" => self.id, "id" => *batch.id, "peer" => format!("{}", batch.current_peer)
);
// At least one block was successfully verified and imported, so we can be sure all
// previous batches are valid and we only need to download the current failed
@ -343,7 +362,8 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
// that it is likely all peers in this chain are are sending invalid batches
// repeatedly and are either malicious or faulty. We drop the chain and
// downvote all peers.
warn!(self.log, "Batch failed to download. Dropping chain and downvoting peers"; "id"=> *batch.id);
warn!(self.log, "Batch failed to download. Dropping chain and downvoting peers";
"chain_id" => self.id, "id"=> *batch.id);
for peer_id in self.peer_pool.drain() {
network.downvote_peer(peer_id);
}
@ -355,7 +375,8 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
}
}
BatchProcessResult::Failed => {
warn!(self.log, "Batch processing failed"; "id" => *batch.id, "peer" => format!("{}", batch.current_peer));
warn!(self.log, "Batch processing failed";
"chain_id" => self.id,"id" => *batch.id, "peer" => format!("{}", batch.current_peer));
// The batch processing failed
// This could be because this batch is invalid, or a previous invalidated batch
// is invalid. We need to find out which and downvote the peer that has sent us
@ -367,7 +388,8 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
// that it is likely all peers in this chain are are sending invalid batches
// repeatedly and are either malicious or faulty. We drop the chain and
// downvote all peers.
warn!(self.log, "Batch failed to download. Dropping chain and downvoting peers"; "id"=> *batch.id);
warn!(self.log, "Batch failed to download. Dropping chain and downvoting peers";
"chain_id" => self.id, "id"=> *batch.id);
for peer_id in self.peer_pool.drain() {
network.downvote_peer(peer_id);
}
@ -399,8 +421,9 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
let processed_batch = self.processed_batches.remove(0);
if *processed_batch.id >= *last_batch.id {
crit!(self.log, "A processed batch had a greater id than the current process id";
"processed_id" => *processed_batch.id,
"current_id" => *last_batch.id);
"chain_id" => self.id,
"processed_id" => *processed_batch.id,
"current_id" => *last_batch.id);
}
if let Some(prev_hash) = processed_batch.original_hash {
@ -415,9 +438,10 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
// now.
debug!(
self.log, "Re-processed batch validated. Downvoting original peer";
"batch_id" => *processed_batch.id,
"original_peer" => format!("{}",processed_batch.original_peer),
"new_peer" => format!("{}", processed_batch.current_peer)
"chain_id" => self.id,
"batch_id" => *processed_batch.id,
"original_peer" => format!("{}",processed_batch.original_peer),
"new_peer" => format!("{}", processed_batch.current_peer)
);
network.downvote_peer(processed_batch.original_peer);
}
@ -494,6 +518,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
batch.current_peer = new_peer.clone();
debug!(self.log, "Re-requesting batch";
"chain_id" => self.id,
"start_slot" => batch.start_slot,
"end_slot" => batch.end_slot,
"id" => *batch.id,
@ -527,6 +552,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
if local_finalized_slot > self.current_processed_slot() {
debug!(self.log, "Updating chain's progress";
"chain_id" => self.id,
"prev_completed_slot" => self.current_processed_slot(),
"new_completed_slot" => local_finalized_slot.as_u64());
// Re-index batches
@ -554,7 +580,8 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
self.peer_pool.insert(peer_id.clone());
// do not request blocks if the chain is not syncing
if let ChainSyncingState::Stopped = self.state {
debug!(self.log, "Peer added to a non-syncing chain"; "peer_id" => format!("{}", peer_id));
debug!(self.log, "Peer added to a non-syncing chain";
"chain_id" => self.id, "peer_id" => format!("{}", peer_id));
return;
}
@ -583,6 +610,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
) -> Option<ProcessingResult> {
if let Some(batch) = self.pending_batches.remove(request_id) {
warn!(self.log, "Batch failed. RPC Error";
"chain_id" => self.id,
"id" => *batch.id,
"retries" => batch.retries,
"peer" => format!("{:?}", peer_id));
@ -623,6 +651,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
batch.current_peer = new_peer.clone();
debug!(self.log, "Re-Requesting batch";
"chain_id" => self.id,
"start_slot" => batch.start_slot,
"end_slot" => batch.end_slot,
"id" => *batch.id,
@ -647,6 +676,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
if let Some(peer_id) = self.get_next_peer() {
if let Some(batch) = self.get_next_batch(peer_id) {
debug!(self.log, "Requesting batch";
"chain_id" => self.id,
"start_slot" => batch.start_slot,
"end_slot" => batch.end_slot,
"id" => *batch.id,

View File

@ -305,7 +305,9 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
peer_id: PeerId,
sync_send: mpsc::UnboundedSender<SyncMessage<T::EthSpec>>,
) {
let chain_id = rand::random();
self.finalized_chains.push(SyncingChain::new(
chain_id,
local_finalized_slot,
target_slot,
target_head,
@ -334,7 +336,9 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
});
self.head_chains.retain(|chain| !chain.peer_pool.is_empty());
let chain_id = rand::random();
let mut new_head_chain = SyncingChain::new(
chain_id,
remote_finalized_slot,
target_slot,
target_head,

View File

@ -8,4 +8,5 @@ mod range;
pub use batch::Batch;
pub use batch::BatchId;
pub use chain::ChainId;
pub use range::RangeSync;

View File

@ -39,7 +39,7 @@
//! Each chain is downloaded in batches of blocks. The batched blocks are processed sequentially
//! and further batches are requested as current blocks are being processed.
use super::chain::ProcessingResult;
use super::chain::{ChainId, ProcessingResult};
use super::chain_collection::{ChainCollection, RangeSyncState};
use super::BatchId;
use crate::router::processor::PeerSyncInfo;
@ -252,6 +252,7 @@ impl<T: BeaconChainTypes> RangeSync<T> {
pub fn handle_block_process_result(
&mut self,
network: &mut SyncNetworkContext<T::EthSpec>,
chain_id: ChainId,
batch_id: BatchId,
downloaded_blocks: Vec<SignedBeaconBlock<T::EthSpec>>,
result: BatchProcessResult,
@ -260,7 +261,13 @@ impl<T: BeaconChainTypes> RangeSync<T> {
let mut downloaded_blocks = Some(downloaded_blocks);
match self.chains.finalized_request(|chain| {
chain.on_batch_process_result(network, batch_id, &mut downloaded_blocks, &result)
chain.on_batch_process_result(
network,
chain_id,
batch_id,
&mut downloaded_blocks,
&result,
)
}) {
Some((index, ProcessingResult::RemoveChain)) => {
let chain = self.chains.remove_finalized_chain(index);
@ -291,6 +298,7 @@ impl<T: BeaconChainTypes> RangeSync<T> {
match self.chains.head_request(|chain| {
chain.on_batch_process_result(
network,
chain_id,
batch_id,
&mut downloaded_blocks,
&result,