Expand eth1 block cache, add more logs (#1938)

## Issue Addressed

NA

## Proposed Changes

- Caches later blocks than is required by `ETH1_FOLLOW_DISTANCE`.
- Adds logging to `warn` if the eth1 cache is insufficiently primed.
- Use `max_by_key` instead of `max_by` in `BeaconChain::Eth1Chain` since it's simpler.
- Rename `voting_period_start_timestamp` to `voting_target_timestamp` for accuracy.

## Additional Info

The reason for eating into the `ETH1_FOLLOW_DISTANCE` and caching blocks that are closer to the head is due to possibility for `SECONDS_PER_ETH1_BLOCK` to be incorrect (as is the case for the Pyrmont testnet on Goerli).

If `SECONDS_PER_ETH1_BLOCK` is too short, we'll skip back too far from the head and skip over blocks that would be valid [`is_candidate_block`](https://github.com/ethereum/eth2.0-specs/blob/v1.0.0/specs/phase0/validator.md#eth1-data) blocks. This was the case on the Pyrmont testnet and resulted in Lighthouse choosing blocks that were about 30 minutes older than is ideal.
This commit is contained in:
Paul Hauner 2020-11-21 00:26:15 +00:00
parent 3b405f10ea
commit 48f73b21e6
6 changed files with 114 additions and 19 deletions

View File

@ -77,7 +77,7 @@ fn get_sync_status<T: EthSpec>(
let period = T::SlotsPerEth1VotingPeriod::to_u64();
// Since `period` is a "constant", we assume it is set sensibly.
let voting_period_start_slot = (current_slot / period) * period;
let voting_period_start_timestamp = {
let voting_target_timestamp = {
let period_start = slot_start_seconds::<T>(
genesis_time,
spec.milliseconds_per_slot,
@ -125,14 +125,14 @@ fn get_sync_status<T: EthSpec>(
// Lighthouse is "cached and ready" when it has cached enough blocks to cover the start of the
// current voting period.
let lighthouse_is_cached_and_ready =
latest_cached_block_timestamp.map_or(false, |t| t >= voting_period_start_timestamp);
latest_cached_block_timestamp.map_or(false, |t| t >= voting_target_timestamp);
Some(Eth1SyncStatusData {
head_block_number,
head_block_timestamp,
latest_cached_block_number,
latest_cached_block_timestamp,
voting_period_start_timestamp,
voting_target_timestamp,
eth1_node_sync_status_percentage,
lighthouse_is_cached_and_ready,
})
@ -450,13 +450,14 @@ impl<T: EthSpec> Eth1ChainBackend<T> for CachingEth1Backend<T> {
// If no votes exist, choose `state.eth1_data` as default vote.
votes_to_consider
.iter()
.max_by(|(_, x), (_, y)| x.cmp(y))
.max_by_key(|(_, block_number)| *block_number)
.map(|vote| {
let vote = vote.0.clone();
debug!(
self.log,
"No valid eth1_data votes";
"outcome" => "Casting vote corresponding to last candidate eth1 block",
"vote" => ?vote
);
vote
})

View File

@ -171,6 +171,38 @@ pub fn spawn_notifier<T: BeaconChainTypes>(
"current_slot" => current_slot,
);
}
// Perform some logging about the eth1 chain
if let Some(eth1_chain) = beacon_chain.eth1_chain.as_ref() {
if let Some(status) =
eth1_chain.sync_status(head_info.genesis_time, current_slot, &beacon_chain.spec)
{
debug!(
log,
"Eth1 cache sync status";
"eth1_head_block" => status.head_block_number,
"latest_cached_block_number" => status.latest_cached_block_number,
"latest_cached_timestamp" => status.latest_cached_block_timestamp,
"voting_target_timestamp" => status.voting_target_timestamp,
"ready" => status.lighthouse_is_cached_and_ready
);
if !status.lighthouse_is_cached_and_ready {
warn!(
log,
"Syncing eth1 block cache";
"target_timestamp" => status.voting_target_timestamp,
"latest_timestamp" => status.latest_cached_block_timestamp,
"msg" => "block production temporarily impaired"
);
}
} else {
error!(
log,
"Unable to determine eth1 sync status";
);
}
}
}
Ok::<(), ()>(())
};

View File

@ -16,7 +16,7 @@ use std::ops::{Range, RangeInclusive};
use std::sync::Arc;
use std::time::{SystemTime, UNIX_EPOCH};
use tokio::time::{interval_at, Duration, Instant};
use types::ChainSpec;
use types::{ChainSpec, EthSpec, Unsigned};
/// Indicates the default eth1 network id we use for the deposit contract.
pub const DEFAULT_NETWORK_ID: Eth1Id = Eth1Id::Goerli;
@ -34,6 +34,9 @@ const GET_DEPOSIT_LOG_TIMEOUT_MILLIS: u64 = STANDARD_TIMEOUT_MILLIS;
const WARNING_MSG: &str = "BLOCK PROPOSALS WILL FAIL WITHOUT VALID ETH1 CONNECTION";
/// A factor used to reduce the eth1 follow distance to account for discrepancies in the block time.
const ETH1_BLOCK_TIME_TOLERANCE_FACTOR: u64 = 4;
#[derive(Debug, PartialEq)]
pub enum Error {
/// The remote node is less synced that we expect, it is not useful until has done more
@ -41,7 +44,7 @@ pub enum Error {
RemoteNotSynced {
next_required_block: u64,
remote_highest_block: u64,
follow_distance: u64,
reduced_follow_distance: u64,
},
/// Failed to download a block from the eth1 node.
BlockDownloadFailed(String),
@ -113,6 +116,31 @@ pub struct Config {
pub max_blocks_per_update: Option<usize>,
}
impl Config {
/// Sets the block cache to a length that is suitable for the given `EthSpec` and `ChainSpec`.
pub fn set_block_cache_truncation<E: EthSpec>(&mut self, spec: &ChainSpec) {
// Compute the number of eth1 blocks in an eth1 voting period.
let seconds_per_voting_period =
E::SlotsPerEth1VotingPeriod::to_u64() * spec.milliseconds_per_slot / 1000;
let eth1_blocks_per_voting_period = seconds_per_voting_period / spec.seconds_per_eth1_block;
// Compute the number of extra blocks we store prior to the voting period start blocks.
let follow_distance_tolerance_blocks =
spec.eth1_follow_distance / ETH1_BLOCK_TIME_TOLERANCE_FACTOR;
// Ensure we can store two full windows of voting blocks.
let voting_windows = eth1_blocks_per_voting_period * 2;
// Extend the cache to account for varying eth1 block times and the follow distance
// tolerance blocks.
let length = voting_windows
+ (voting_windows / ETH1_BLOCK_TIME_TOLERANCE_FACTOR)
+ follow_distance_tolerance_blocks;
self.block_cache_truncation = Some(length as usize);
}
}
impl Default for Config {
fn default() -> Self {
Self {
@ -161,6 +189,18 @@ impl Service {
}
}
/// Returns the follow distance that has been shortened to accommodate for differences in the
/// spacing between blocks.
///
/// ## Notes
///
/// This is useful since the spec declares `SECONDS_PER_ETH1_BLOCK` to be `14`, whilst it is
/// actually `15` on Goerli.
pub fn reduced_follow_distance(&self) -> u64 {
let full = self.config().follow_distance;
full.saturating_sub(full / ETH1_BLOCK_TIME_TOLERANCE_FACTOR)
}
/// Return byte representation of deposit and block caches.
pub fn as_bytes(&self) -> Vec<u8> {
self.inner.as_bytes()
@ -471,7 +511,7 @@ impl Service {
remote_highest_block_opt: Option<u64>,
) -> Result<DepositCacheUpdateOutcome, Error> {
let endpoint = self.config().endpoint.clone();
let follow_distance = self.config().follow_distance;
let reduced_follow_distance = self.reduced_follow_distance();
let deposit_contract_address = self.config().deposit_contract_address.clone();
let blocks_per_log_query = self.config().blocks_per_log_query;
@ -491,7 +531,7 @@ impl Service {
&endpoint,
remote_highest_block_opt,
next_required_block,
follow_distance,
reduced_follow_distance,
)
.await?;
@ -632,13 +672,13 @@ impl Service {
.unwrap_or_else(|| self.config().lowest_cached_block_number);
let endpoint = self.config().endpoint.clone();
let follow_distance = self.config().follow_distance;
let reduced_follow_distance = self.reduced_follow_distance();
let range = get_new_block_numbers(
&endpoint,
remote_highest_block_opt,
next_required_block,
follow_distance,
reduced_follow_distance,
)
.await?;
// Map the range of required blocks into a Vec.
@ -778,7 +818,7 @@ async fn get_new_block_numbers<'a>(
endpoint: &str,
remote_highest_block_opt: Option<u64>,
next_required_block: u64,
follow_distance: u64,
reduced_follow_distance: u64,
) -> Result<Option<RangeInclusive<u64>>, Error> {
let remote_highest_block = if let Some(block_number) = remote_highest_block_opt {
block_number
@ -787,7 +827,7 @@ async fn get_new_block_numbers<'a>(
.map_err(Error::GetBlockNumberFailed)
.await?
};
let remote_follow_block = remote_highest_block.saturating_sub(follow_distance);
let remote_follow_block = remote_highest_block.saturating_sub(reduced_follow_distance);
if next_required_block <= remote_follow_block {
Ok(Some(next_required_block..=remote_follow_block))
@ -795,12 +835,12 @@ async fn get_new_block_numbers<'a>(
// If this is the case, the node must have gone "backwards" in terms of it's sync
// (i.e., it's head block is lower than it was before).
//
// We assume that the `follow_distance` should be sufficient to ensure this never
// We assume that the `reduced_follow_distance` should be sufficient to ensure this never
// happens, otherwise it is an error.
Err(Error::RemoteNotSynced {
next_required_block,
remote_highest_block,
follow_distance,
reduced_follow_distance,
})
} else {
// Return an empty range.
@ -860,6 +900,7 @@ async fn download_eth1_block(
mod tests {
use super::*;
use toml;
use types::MainnetEthSpec;
#[test]
fn serde_serialize() {
@ -867,4 +908,26 @@ mod tests {
toml::to_string(&Config::default()).expect("Should serde encode default config");
toml::from_str::<Config>(&serialized).expect("Should serde decode default config");
}
#[test]
fn block_cache_size() {
let mut config = Config::default();
let spec = MainnetEthSpec::default_spec();
config.set_block_cache_truncation::<MainnetEthSpec>(&spec);
let len = config.block_cache_truncation.unwrap();
let seconds_per_voting_period =
<MainnetEthSpec as EthSpec>::SlotsPerEth1VotingPeriod::to_u64()
* (spec.milliseconds_per_slot / 1000);
let eth1_blocks_per_voting_period = seconds_per_voting_period / spec.seconds_per_eth1_block;
let reduce_follow_distance_blocks =
config.follow_distance / ETH1_BLOCK_TIME_TOLERANCE_FACTOR;
let minimum_len = eth1_blocks_per_voting_period * 2 + reduce_follow_distance_blocks;
assert!(len > minimum_len as usize);
}
}

View File

@ -272,6 +272,7 @@ pub fn get_config<E: EthSpec>(
client_config.eth1.follow_distance = spec.eth1_follow_distance;
client_config.eth1.network_id = spec.deposit_network_id.into();
client_config.eth1.chain_id = spec.deposit_chain_id.into();
client_config.eth1.set_block_cache_truncation::<E>(spec);
if let Some(mut boot_nodes) = eth2_testnet_config.boot_enr {
client_config.network.boot_nodes_enr.append(&mut boot_nodes)

View File

@ -192,9 +192,7 @@ health of the Eth1 node that the beacon node is connected to.
number and timestamp of the latest block we have in our block cache.
- For correct Eth1 voting this timestamp should be later than the
`voting_period_start_timestamp`.
- `voting_period_start_timestamp`: the start of the period where block
producers must include votes for blocks in the Eth1 chain. Provided for
reference.
- `voting_target_timestamp`: The latest timestamp allowed for an eth1 block in this voting period.
- `eth1_node_sync_status_percentage` (float): An estimate of how far the head of the
Eth1 node is from the head of the Eth1 chain.
- `100.0` indicates a fully synced Eth1 node.
@ -222,7 +220,7 @@ curl -X GET "http://localhost:5052/lighthouse/eth1/syncing" -H "accept: applica
"head_block_timestamp": 1603249317,
"latest_cached_block_number": 3610758,
"latest_cached_block_timestamp": 1603233597,
"voting_period_start_timestamp": 1603228632,
"voting_target_timestamp": 1603228632,
"eth1_node_sync_status_percentage": 100,
"lighthouse_is_cached_and_ready": true
}

View File

@ -153,7 +153,7 @@ pub struct Eth1SyncStatusData {
pub head_block_timestamp: Option<u64>,
pub latest_cached_block_number: Option<u64>,
pub latest_cached_block_timestamp: Option<u64>,
pub voting_period_start_timestamp: u64,
pub voting_target_timestamp: u64,
pub eth1_node_sync_status_percentage: f64,
pub lighthouse_is_cached_and_ready: bool,
}