Improve eth1 block sync (#2008)
## Issue Addressed NA ## Proposed Changes - Log about eth1 whilst waiting for genesis. - For the block and deposit caches, update them after each download instead of when *all* downloads are complete. - This prevents the case where a single timeout error can cause us to drop *all* previously download blocks/deposits. - Set `max_log_requests_per_update` to avoid timeouts due to very large log counts in a response. - Set `max_blocks_per_update` to prevent a single update of the block cache to download an unreasonable number of blocks. - This shouldn't have any affect in normal use, it's just a safe-guard against bugs. - Increase the timeout for eth1 calls from 15s to 60s, as per @pawanjay176's experience with Infura. ## Additional Info NA
This commit is contained in:
parent
8fcd22992c
commit
77f3539654
@ -71,23 +71,52 @@ fn get_sync_status<T: EthSpec>(
|
||||
latest_cached_block: Option<&Eth1Block>,
|
||||
head_block: Option<&Eth1Block>,
|
||||
genesis_time: u64,
|
||||
current_slot: Slot,
|
||||
current_slot: Option<Slot>,
|
||||
spec: &ChainSpec,
|
||||
) -> Option<Eth1SyncStatusData> {
|
||||
let eth1_follow_distance_seconds = spec
|
||||
.seconds_per_eth1_block
|
||||
.saturating_mul(spec.eth1_follow_distance);
|
||||
|
||||
// The voting target timestamp needs to be special-cased when we're before
|
||||
// genesis (as defined by `current_slot == None`).
|
||||
//
|
||||
// For the sake of this status, when prior to genesis we want to invent some voting periods
|
||||
// that are *before* genesis, so that we can indicate to users that we're actually adequately
|
||||
// cached for where they are in time.
|
||||
let voting_target_timestamp = if let Some(current_slot) = current_slot {
|
||||
let period = T::SlotsPerEth1VotingPeriod::to_u64();
|
||||
// Since `period` is a "constant", we assume it is set sensibly.
|
||||
let voting_period_start_slot = (current_slot / period) * period;
|
||||
let voting_target_timestamp = {
|
||||
|
||||
let period_start = slot_start_seconds::<T>(
|
||||
genesis_time,
|
||||
spec.milliseconds_per_slot,
|
||||
voting_period_start_slot,
|
||||
);
|
||||
let eth1_follow_distance_seconds = spec
|
||||
.seconds_per_eth1_block
|
||||
.saturating_mul(spec.eth1_follow_distance);
|
||||
|
||||
period_start.saturating_sub(eth1_follow_distance_seconds)
|
||||
} else {
|
||||
// The number of seconds in an eth1 voting period.
|
||||
let voting_period_duration =
|
||||
T::slots_per_eth1_voting_period() as u64 * (spec.milliseconds_per_slot / 1_000);
|
||||
|
||||
let now = SystemTime::now().duration_since(UNIX_EPOCH).ok()?.as_secs();
|
||||
|
||||
// The number of seconds between now and genesis.
|
||||
let seconds_till_genesis = genesis_time.saturating_sub(now);
|
||||
|
||||
// Determine how many voting periods are contained in distance between
|
||||
// now and genesis, rounding up.
|
||||
let voting_periods_past =
|
||||
(seconds_till_genesis + voting_period_duration - 1) / voting_period_duration;
|
||||
|
||||
// Return the start time of the current voting period*.
|
||||
//
|
||||
// *: This voting period doesn't *actually* exist, we're just using it to
|
||||
// give useful logs prior to genesis.
|
||||
genesis_time
|
||||
.saturating_sub(voting_periods_past * voting_period_duration)
|
||||
.saturating_sub(eth1_follow_distance_seconds)
|
||||
};
|
||||
|
||||
let latest_cached_block_number = latest_cached_block.map(|b| b.number);
|
||||
@ -232,7 +261,7 @@ where
|
||||
pub fn sync_status(
|
||||
&self,
|
||||
genesis_time: u64,
|
||||
current_slot: Slot,
|
||||
current_slot: Option<Slot>,
|
||||
spec: &ChainSpec,
|
||||
) -> Option<Eth1SyncStatusData> {
|
||||
get_sync_status::<E>(
|
||||
|
@ -3,7 +3,7 @@ use beacon_chain::{BeaconChain, BeaconChainTypes};
|
||||
use eth2_libp2p::NetworkGlobals;
|
||||
use futures::prelude::*;
|
||||
use parking_lot::Mutex;
|
||||
use slog::{debug, error, info, warn};
|
||||
use slog::{debug, error, info, warn, Logger};
|
||||
use slot_clock::SlotClock;
|
||||
use std::sync::Arc;
|
||||
use std::time::{Duration, Instant};
|
||||
@ -56,6 +56,7 @@ pub fn spawn_notifier<T: BeaconChainTypes>(
|
||||
"peers" => peer_count_pretty(network.connected_peers()),
|
||||
"wait_time" => estimated_time_pretty(Some(next_slot.as_secs() as f64)),
|
||||
);
|
||||
eth1_logging(&beacon_chain, &log);
|
||||
sleep(slot_duration).await;
|
||||
}
|
||||
_ => break,
|
||||
@ -172,10 +173,25 @@ pub fn spawn_notifier<T: BeaconChainTypes>(
|
||||
);
|
||||
}
|
||||
|
||||
eth1_logging(&beacon_chain, &log);
|
||||
}
|
||||
Ok::<(), ()>(())
|
||||
};
|
||||
|
||||
// run the notifier on the current executor
|
||||
executor.spawn(interval_future.unwrap_or_else(|_| ()), "notifier");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn eth1_logging<T: BeaconChainTypes>(beacon_chain: &BeaconChain<T>, log: &Logger) {
|
||||
let current_slot_opt = beacon_chain.slot().ok();
|
||||
|
||||
if let Ok(head_info) = beacon_chain.head_info() {
|
||||
// Perform some logging about the eth1 chain
|
||||
if let Some(eth1_chain) = beacon_chain.eth1_chain.as_ref() {
|
||||
if let Some(status) =
|
||||
eth1_chain.sync_status(head_info.genesis_time, current_slot, &beacon_chain.spec)
|
||||
eth1_chain.sync_status(head_info.genesis_time, current_slot_opt, &beacon_chain.spec)
|
||||
{
|
||||
debug!(
|
||||
log,
|
||||
@ -188,12 +204,22 @@ pub fn spawn_notifier<T: BeaconChainTypes>(
|
||||
);
|
||||
|
||||
if !status.lighthouse_is_cached_and_ready {
|
||||
let voting_target_timestamp = status.voting_target_timestamp;
|
||||
|
||||
let distance = status
|
||||
.latest_cached_block_timestamp
|
||||
.map(|latest| {
|
||||
voting_target_timestamp.saturating_sub(latest)
|
||||
/ beacon_chain.spec.seconds_per_eth1_block
|
||||
})
|
||||
.map(|distance| distance.to_string())
|
||||
.unwrap_or_else(|| "initializing deposits".to_string());
|
||||
|
||||
warn!(
|
||||
log,
|
||||
"Syncing eth1 block cache";
|
||||
"target_timestamp" => status.voting_target_timestamp,
|
||||
"latest_timestamp" => status.latest_cached_block_timestamp,
|
||||
"msg" => "block production temporarily impaired"
|
||||
"msg" => "sync can take longer when using remote eth1 nodes",
|
||||
"est_blocks_remaining" => distance,
|
||||
);
|
||||
}
|
||||
} else {
|
||||
@ -203,14 +229,12 @@ pub fn spawn_notifier<T: BeaconChainTypes>(
|
||||
);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
error!(
|
||||
log,
|
||||
"Unable to get head info";
|
||||
);
|
||||
}
|
||||
Ok::<(), ()>(())
|
||||
};
|
||||
|
||||
// run the notifier on the current executor
|
||||
executor.spawn(interval_future.unwrap_or_else(|_| ()), "notifier");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Returns the peer count, returning something helpful if it's `usize::max_value` (effectively a
|
||||
|
@ -4,12 +4,12 @@ use crate::{
|
||||
deposit_cache::Error as DepositCacheError,
|
||||
http::{
|
||||
get_block, get_block_number, get_chain_id, get_deposit_logs_in_range, get_network_id,
|
||||
BlockQuery, Eth1Id, Log,
|
||||
BlockQuery, Eth1Id,
|
||||
},
|
||||
inner::{DepositUpdater, Inner},
|
||||
};
|
||||
use fallback::{Fallback, FallbackError};
|
||||
use futures::{future::TryFutureExt, stream, stream::TryStreamExt, StreamExt};
|
||||
use futures::{future::TryFutureExt, StreamExt};
|
||||
use parking_lot::{RwLock, RwLockReadGuard};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use slog::{crit, debug, error, info, trace, warn, Logger};
|
||||
@ -34,7 +34,7 @@ const BLOCK_NUMBER_TIMEOUT_MILLIS: u64 = STANDARD_TIMEOUT_MILLIS;
|
||||
/// Timeout when doing an eth_getBlockByNumber call.
|
||||
const GET_BLOCK_TIMEOUT_MILLIS: u64 = STANDARD_TIMEOUT_MILLIS;
|
||||
/// Timeout when doing an eth_getLogs to read the deposit contract logs.
|
||||
const GET_DEPOSIT_LOG_TIMEOUT_MILLIS: u64 = STANDARD_TIMEOUT_MILLIS;
|
||||
const GET_DEPOSIT_LOG_TIMEOUT_MILLIS: u64 = 60_000;
|
||||
|
||||
const WARNING_MSG: &str = "BLOCK PROPOSALS WILL FAIL WITHOUT VALID, SYNCED ETH1 CONNECTION";
|
||||
|
||||
@ -384,8 +384,8 @@ impl Default for Config {
|
||||
block_cache_truncation: Some(4_096),
|
||||
auto_update_interval_millis: 7_000,
|
||||
blocks_per_log_query: 1_000,
|
||||
max_log_requests_per_update: None,
|
||||
max_blocks_per_update: None,
|
||||
max_log_requests_per_update: Some(100),
|
||||
max_blocks_per_update: Some(8_192),
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -817,38 +817,40 @@ impl Service {
|
||||
Vec::new()
|
||||
};
|
||||
|
||||
let mut logs_imported: usize = 0;
|
||||
let deposit_contract_address_ref: &str = &deposit_contract_address;
|
||||
let logs: Vec<(Range<u64>, Vec<Log>)> =
|
||||
stream::try_unfold(block_number_chunks.into_iter(), |mut chunks| async {
|
||||
match chunks.next() {
|
||||
Some(chunk) => {
|
||||
let chunk_ref = &chunk;
|
||||
endpoints
|
||||
for block_range in block_number_chunks.into_iter() {
|
||||
if block_range.is_empty() {
|
||||
debug!(
|
||||
self.log,
|
||||
"No new blocks to scan for logs";
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* Step 1. Download logs.
|
||||
*/
|
||||
let block_range_ref = &block_range;
|
||||
let logs = endpoints
|
||||
.first_success(|e| async move {
|
||||
get_deposit_logs_in_range(
|
||||
e,
|
||||
deposit_contract_address_ref,
|
||||
chunk_ref.clone(),
|
||||
&deposit_contract_address_ref,
|
||||
block_range_ref.clone(),
|
||||
Duration::from_millis(GET_DEPOSIT_LOG_TIMEOUT_MILLIS),
|
||||
)
|
||||
.await
|
||||
.map_err(SingleEndpointError::GetDepositLogsFailed)
|
||||
})
|
||||
.await
|
||||
.map(|logs| Some(((chunk, logs), chunks)))
|
||||
}
|
||||
None => Ok(None),
|
||||
}
|
||||
})
|
||||
.try_collect()
|
||||
.await
|
||||
.map_err(Error::FallbackError)?;
|
||||
|
||||
let mut logs_imported = 0;
|
||||
for (block_range, log_chunk) in logs.iter() {
|
||||
/*
|
||||
* Step 2. Import logs to cache.
|
||||
*/
|
||||
let mut cache = self.deposits().write();
|
||||
log_chunk
|
||||
.iter()
|
||||
logs.iter()
|
||||
.map(|raw_log| {
|
||||
raw_log.to_deposit_log(self.inner.spec()).map_err(|error| {
|
||||
Error::FailedToParseDepositLog {
|
||||
@ -881,6 +883,12 @@ impl Service {
|
||||
// node to choose an invalid genesis state or propose an invalid block.
|
||||
.collect::<Result<_, _>>()?;
|
||||
|
||||
debug!(
|
||||
self.log,
|
||||
"Imported deposit logs chunk";
|
||||
"logs" => logs.len(),
|
||||
);
|
||||
|
||||
cache.last_processed_block = Some(block_range.end.saturating_sub(1));
|
||||
|
||||
metrics::set_gauge(&metrics::DEPOSIT_CACHE_LEN, cache.cache.len() as i64);
|
||||
@ -976,8 +984,9 @@ impl Service {
|
||||
} else {
|
||||
Vec::new()
|
||||
};
|
||||
// Download the range of blocks and sequentially import them into the cache.
|
||||
// Last processed block in deposit cache
|
||||
|
||||
// This value is used to prevent the block cache from importing a block that is not yet in
|
||||
// the deposit cache.
|
||||
let latest_in_cache = self
|
||||
.inner
|
||||
.deposit_cache
|
||||
@ -990,34 +999,26 @@ impl Service {
|
||||
.filter(|x| *x <= latest_in_cache)
|
||||
.take(max_blocks_per_update)
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
debug!(
|
||||
self.log,
|
||||
"Downloading eth1 blocks";
|
||||
"first" => ?required_block_numbers.first(),
|
||||
"last" => ?required_block_numbers.last(),
|
||||
);
|
||||
|
||||
// Produce a stream from the list of required block numbers and return a future that
|
||||
// consumes the it.
|
||||
|
||||
let eth1_blocks: Vec<Eth1Block> = stream::try_unfold(
|
||||
required_block_numbers.into_iter(),
|
||||
|mut block_numbers| async {
|
||||
match block_numbers.next() {
|
||||
Some(block_number) => {
|
||||
match endpoints
|
||||
let mut blocks_imported = 0;
|
||||
for block_number in required_block_numbers {
|
||||
let eth1_block = endpoints
|
||||
.first_success(|e| async move {
|
||||
download_eth1_block(e, self.inner.clone(), Some(block_number)).await
|
||||
})
|
||||
.await
|
||||
{
|
||||
Ok(eth1_block) => Ok(Some((eth1_block, block_numbers))),
|
||||
Err(e) => Err(e),
|
||||
}
|
||||
}
|
||||
None => Ok(None),
|
||||
}
|
||||
},
|
||||
)
|
||||
.try_collect()
|
||||
.await
|
||||
.map_err(Error::FallbackError)?;
|
||||
|
||||
let mut blocks_imported = 0;
|
||||
for eth1_block in eth1_blocks {
|
||||
self.inner
|
||||
.block_cache
|
||||
.write()
|
||||
|
@ -2075,9 +2075,7 @@ pub fn serve<T: BeaconChainTypes>(
|
||||
let head_info = chain
|
||||
.head_info()
|
||||
.map_err(warp_utils::reject::beacon_chain_error)?;
|
||||
let current_slot = chain
|
||||
.slot()
|
||||
.map_err(warp_utils::reject::beacon_chain_error)?;
|
||||
let current_slot_opt = chain.slot().ok();
|
||||
|
||||
chain
|
||||
.eth1_chain
|
||||
@ -2088,7 +2086,7 @@ pub fn serve<T: BeaconChainTypes>(
|
||||
)
|
||||
})
|
||||
.and_then(|eth1| {
|
||||
eth1.sync_status(head_info.genesis_time, current_slot, &chain.spec)
|
||||
eth1.sync_status(head_info.genesis_time, current_slot_opt, &chain.spec)
|
||||
.ok_or_else(|| {
|
||||
warp_utils::reject::custom_server_error(
|
||||
"Unable to determine Eth1 sync status".to_string(),
|
||||
|
@ -6,6 +6,7 @@
|
||||
- [What should I do if I lose my slashing protection database?](#what-should-i-do-if-i-lose-my-slashing-protection-database)
|
||||
- [How do I update lighthouse?](#how-do-i-update-lighthouse)
|
||||
- [I can't compile lighthouse](#i-cant-compile-lighthouse)
|
||||
- [What is "Syncing eth1 block cache"](#what-is-syncing-eth1-block-cache)
|
||||
|
||||
|
||||
### Why does it take so long for a validator to be activated?
|
||||
@ -155,3 +156,21 @@ You will just also need to make sure the code you have checked out is up to date
|
||||
### I can't compile lighthouse
|
||||
|
||||
See [here.](./installation-source.md#troubleshooting)
|
||||
|
||||
### What is "Syncing eth1 block cache"
|
||||
|
||||
```
|
||||
Nov 30 21:04:28.268 WARN Syncing eth1 block cache est_blocks_remaining: initializing deposits, msg: sync can take longer when using remote eth1 nodes, service: slot_notifier
|
||||
```
|
||||
|
||||
This log indicates that your beacon node is downloading blocks and deposits
|
||||
from your eth1 node. When the `est_blocks_remaining` is
|
||||
`initializing_deposits`, your node is downloading deposit logs. It may stay in
|
||||
this stage for several minutes. Once the deposits logs are finished
|
||||
downloading, the `est_blocks_remaining` value will start decreasing.
|
||||
|
||||
It is perfectly normal to see this log when starting a node for the first time
|
||||
or after being off for more than several minutes.
|
||||
|
||||
If this log continues appearing sporadically during operation, there may be an
|
||||
issue with your eth1 endpoint.
|
||||
|
Loading…
Reference in New Issue
Block a user