From 99d2c33387477398fc11b55319a064f03ab1a646 Mon Sep 17 00:00:00 2001 From: Michael Sproul Date: Thu, 27 Jan 2022 22:58:32 +0000 Subject: [PATCH] Avoid looking up pre-finalization blocks (#2909) ## Issue Addressed This PR fixes the unnecessary `WARN Single block lookup failed` messages described here: https://github.com/sigp/lighthouse/pull/2866#issuecomment-1008442640 ## Proposed Changes Add a new cache to the `BeaconChain` that tracks the block roots of blocks from before finalization. These could be blocks from the canonical chain (which might need to be read from disk), or old pre-finalization blocks that have been forked out. The cache also stores a set of block roots for in-progress single block lookups, which duplicates some of the information from sync's `single_block_lookups` hashmap: https://github.com/sigp/lighthouse/blob/a836e180f9ad51f31767c5ffb33bebdeff1a9f3f/beacon_node/network/src/sync/manager.rs#L192-L196 On a live node you can confirm that the cache is working by grepping logs for the message: `Rejected attestation to finalized block`. --- .../src/attestation_verification.rs | 18 +++ beacon_node/beacon_chain/src/beacon_chain.rs | 7 ++ .../beacon_chain/src/block_verification.rs | 10 +- beacon_node/beacon_chain/src/builder.rs | 1 + beacon_node/beacon_chain/src/lib.rs | 1 + beacon_node/beacon_chain/src/metrics.rs | 19 +++ .../src/pre_finalization_cache.rs | 119 ++++++++++++++++++ .../tests/attestation_verification.rs | 77 +++++++++++- .../beacon_processor/worker/gossip_methods.rs | 20 +++ 9 files changed, 267 insertions(+), 5 deletions(-) create mode 100644 beacon_node/beacon_chain/src/pre_finalization_cache.rs diff --git a/beacon_node/beacon_chain/src/attestation_verification.rs b/beacon_node/beacon_chain/src/attestation_verification.rs index 6692aa48c..059b3c4d2 100644 --- a/beacon_node/beacon_chain/src/attestation_verification.rs +++ b/beacon_node/beacon_chain/src/attestation_verification.rs @@ -141,6 +141,14 @@ pub enum Error { /// The attestation points to a block we have not yet imported. It's unclear if the attestation /// is valid or not. UnknownHeadBlock { beacon_block_root: Hash256 }, + /// The `attestation.data.beacon_block_root` block is from before the finalized checkpoint. + /// + /// ## Peer scoring + /// + /// The attestation is not descended from the finalized checkpoint, which is a REJECT according + /// to the spec. We downscore lightly because this could also happen if we are processing + /// attestations extremely slowly. + HeadBlockFinalized { beacon_block_root: Hash256 }, /// The `attestation.data.slot` is not from the same epoch as `data.target.epoch`. /// /// ## Peer scoring @@ -990,7 +998,17 @@ fn verify_head_block_is_known( } Ok(block) + } else if chain.is_pre_finalization_block(attestation.data.beacon_block_root)? { + Err(Error::HeadBlockFinalized { + beacon_block_root: attestation.data.beacon_block_root, + }) } else { + // The block is either: + // + // 1) A pre-finalization block that has been pruned. We'll do one network lookup + // for it and when it fails we will penalise all involved peers. + // 2) A post-finalization block that we don't know about yet. We'll queue + // the attestation until the block becomes available (or we time out). Err(Error::UnknownHeadBlock { beacon_block_root: attestation.data.beacon_block_root, }) diff --git a/beacon_node/beacon_chain/src/beacon_chain.rs b/beacon_node/beacon_chain/src/beacon_chain.rs index 4e1d54dc1..a65a943b9 100644 --- a/beacon_node/beacon_chain/src/beacon_chain.rs +++ b/beacon_node/beacon_chain/src/beacon_chain.rs @@ -34,6 +34,7 @@ use crate::observed_block_producers::ObservedBlockProducers; use crate::observed_operations::{ObservationOutcome, ObservedOperations}; use crate::persisted_beacon_chain::{PersistedBeaconChain, DUMMY_CANONICAL_HEAD_BLOCK_ROOT}; use crate::persisted_fork_choice::PersistedForkChoice; +use crate::pre_finalization_cache::PreFinalizationBlockCache; use crate::shuffling_cache::{BlockShufflingIds, ShufflingCache}; use crate::snapshot_cache::SnapshotCache; use crate::sync_committee_verification::{ @@ -336,6 +337,8 @@ pub struct BeaconChain { pub early_attester_cache: EarlyAttesterCache, /// A cache used to keep track of various block timings. pub block_times_cache: Arc>, + /// A cache used to track pre-finalization block roots for quick rejection. + pub pre_finalization_block_cache: PreFinalizationBlockCache, /// Sender given to tasks, so that if they encounter a state in which execution cannot /// continue they can request that everything shuts down. pub shutdown_sender: Sender, @@ -2855,6 +2858,10 @@ impl BeaconChain { ); } + // Inform the unknown block cache, in case it was waiting on this block. + self.pre_finalization_block_cache + .block_processed(block_root); + Ok(block_root) } diff --git a/beacon_node/beacon_chain/src/block_verification.rs b/beacon_node/beacon_chain/src/block_verification.rs index a4a1dc31b..c2dc0028e 100644 --- a/beacon_node/beacon_chain/src/block_verification.rs +++ b/beacon_node/beacon_chain/src/block_verification.rs @@ -618,7 +618,7 @@ impl GossipVerifiedBlock { check_block_against_anchor_slot(block.message(), chain)?; // Do not gossip a block from a finalized slot. - check_block_against_finalized_slot(block.message(), chain)?; + check_block_against_finalized_slot(block.message(), block_root, chain)?; // Check if the block is already known. We know it is post-finalization, so it is // sufficient to check the fork choice. @@ -1292,6 +1292,7 @@ fn check_block_against_anchor_slot( /// verifying that condition. fn check_block_against_finalized_slot( block: BeaconBlockRef<'_, T::EthSpec>, + block_root: Hash256, chain: &BeaconChain, ) -> Result<(), BlockError> { let finalized_slot = chain @@ -1301,6 +1302,7 @@ fn check_block_against_finalized_slot( .start_slot(T::EthSpec::slots_per_epoch()); if block.slot() <= finalized_slot { + chain.pre_finalization_block_rejected(block_root); Err(BlockError::WouldRevertFinalizedSlot { block_slot: block.slot(), finalized_slot, @@ -1373,11 +1375,11 @@ pub fn check_block_relevancy( return Err(BlockError::BlockSlotLimitReached); } - // Do not process a block from a finalized slot. - check_block_against_finalized_slot(block, chain)?; - let block_root = block_root.unwrap_or_else(|| get_block_root(signed_block)); + // Do not process a block from a finalized slot. + check_block_against_finalized_slot(block, block_root, chain)?; + // Check if the block is already known. We know it is post-finalization, so it is // sufficient to check the fork choice. if chain.fork_choice.read().contains_block(&block_root) { diff --git a/beacon_node/beacon_chain/src/builder.rs b/beacon_node/beacon_chain/src/builder.rs index 24a9a916b..e9860124c 100644 --- a/beacon_node/beacon_chain/src/builder.rs +++ b/beacon_node/beacon_chain/src/builder.rs @@ -752,6 +752,7 @@ where shuffling_cache: TimeoutRwLock::new(ShufflingCache::new()), beacon_proposer_cache: <_>::default(), block_times_cache: <_>::default(), + pre_finalization_block_cache: <_>::default(), validator_pubkey_cache: TimeoutRwLock::new(validator_pubkey_cache), attester_cache: <_>::default(), early_attester_cache: <_>::default(), diff --git a/beacon_node/beacon_chain/src/lib.rs b/beacon_node/beacon_chain/src/lib.rs index aff8657e8..d41c1a5cc 100644 --- a/beacon_node/beacon_chain/src/lib.rs +++ b/beacon_node/beacon_chain/src/lib.rs @@ -27,6 +27,7 @@ mod observed_block_producers; pub mod observed_operations; mod persisted_beacon_chain; mod persisted_fork_choice; +mod pre_finalization_cache; pub mod schema_change; mod shuffling_cache; mod snapshot_cache; diff --git a/beacon_node/beacon_chain/src/metrics.rs b/beacon_node/beacon_chain/src/metrics.rs index 28eacad55..41b760453 100644 --- a/beacon_node/beacon_chain/src/metrics.rs +++ b/beacon_node/beacon_chain/src/metrics.rs @@ -904,6 +904,20 @@ lazy_static! { "beacon_backfill_signature_total_seconds", "Time spent verifying the signature set during backfill sync, including setup" ); + + /* + * Pre-finalization block cache. + */ + pub static ref PRE_FINALIZATION_BLOCK_CACHE_SIZE: Result = + try_create_int_gauge( + "beacon_pre_finalization_block_cache_size", + "Number of pre-finalization block roots cached for quick rejection" + ); + pub static ref PRE_FINALIZATION_BLOCK_LOOKUP_COUNT: Result = + try_create_int_gauge( + "beacon_pre_finalization_block_lookup_count", + "Number of block roots subject to single block lookups" + ); } /// Scrape the `beacon_chain` for metrics that are not constantly updated (e.g., the present slot, @@ -931,6 +945,11 @@ pub fn scrape_for_metrics(beacon_chain: &BeaconChain) { ) } + if let Some((size, num_lookups)) = beacon_chain.pre_finalization_block_cache.metrics() { + set_gauge_by_usize(&PRE_FINALIZATION_BLOCK_CACHE_SIZE, size); + set_gauge_by_usize(&PRE_FINALIZATION_BLOCK_LOOKUP_COUNT, num_lookups); + } + set_gauge_by_usize( &OP_POOL_NUM_ATTESTATIONS, attestation_stats.num_attestations, diff --git a/beacon_node/beacon_chain/src/pre_finalization_cache.rs b/beacon_node/beacon_chain/src/pre_finalization_cache.rs new file mode 100644 index 000000000..41771b048 --- /dev/null +++ b/beacon_node/beacon_chain/src/pre_finalization_cache.rs @@ -0,0 +1,119 @@ +use crate::{BeaconChain, BeaconChainError, BeaconChainTypes}; +use itertools::process_results; +use lru::LruCache; +use parking_lot::Mutex; +use slog::debug; +use std::time::Duration; +use types::Hash256; + +const BLOCK_ROOT_CACHE_LIMIT: usize = 512; +const LOOKUP_LIMIT: usize = 8; +const METRICS_TIMEOUT: Duration = Duration::from_millis(100); + +/// Cache for rejecting attestations to blocks from before finalization. +/// +/// It stores a collection of block roots that are pre-finalization and therefore not known to fork +/// choice in `verify_head_block_is_known` during attestation processing. +#[derive(Default)] +pub struct PreFinalizationBlockCache { + cache: Mutex, +} + +struct Cache { + /// Set of block roots that are known to be pre-finalization. + block_roots: LruCache, + /// Set of block roots that are the subject of single block lookups. + in_progress_lookups: LruCache, +} + +impl Default for Cache { + fn default() -> Self { + Cache { + block_roots: LruCache::new(BLOCK_ROOT_CACHE_LIMIT), + in_progress_lookups: LruCache::new(LOOKUP_LIMIT), + } + } +} + +impl BeaconChain { + /// Check whether the block with `block_root` is known to be pre-finalization. + /// + /// The provided `block_root` is assumed to be unknown to fork choice. I.e., it + /// is not known to be a descendant of the finalized block. + /// + /// Return `true` if the attestation to this block should be rejected outright, + /// return `false` if more information is needed from a single-block-lookup. + pub fn is_pre_finalization_block(&self, block_root: Hash256) -> Result { + let mut cache = self.pre_finalization_block_cache.cache.lock(); + + // Check the cache to see if we already know this pre-finalization block root. + if cache.block_roots.contains(&block_root) { + return Ok(true); + } + + // Avoid repeating the disk lookup for blocks that are already subject to a network lookup. + // Sync will take care of de-duplicating the single block lookups. + if cache.in_progress_lookups.contains(&block_root) { + return Ok(false); + } + + // 1. Check memory for a recent pre-finalization block. + let is_recent_finalized_block = self.with_head(|head| { + process_results( + head.beacon_state.rev_iter_block_roots(&self.spec), + |mut iter| iter.any(|(_, root)| root == block_root), + ) + .map_err(BeaconChainError::BeaconStateError) + })?; + if is_recent_finalized_block { + cache.block_roots.put(block_root, ()); + return Ok(true); + } + + // 2. Check on disk. + if self.store.get_block(&block_root)?.is_some() { + cache.block_roots.put(block_root, ()); + return Ok(true); + } + + // 3. Check the network with a single block lookup. + cache.in_progress_lookups.put(block_root, ()); + if cache.in_progress_lookups.len() == LOOKUP_LIMIT { + // NOTE: we expect this to occur sometimes if a lot of blocks that we look up fail to be + // imported for reasons other than being pre-finalization. The cache will eventually + // self-repair in this case by replacing old entries with new ones until all the failed + // blocks have been flushed out. Solving this issue isn't as simple as hooking the + // beacon processor's functions that handle failed blocks because we need the block root + // and it has been erased from the `BlockError` by that point. + debug!( + self.log, + "Pre-finalization lookup cache is full"; + ); + } + Ok(false) + } + + pub fn pre_finalization_block_rejected(&self, block_root: Hash256) { + // Future requests can know that this block is invalid without having to look it up again. + let mut cache = self.pre_finalization_block_cache.cache.lock(); + cache.in_progress_lookups.pop(&block_root); + cache.block_roots.put(block_root, ()); + } +} + +impl PreFinalizationBlockCache { + pub fn block_processed(&self, block_root: Hash256) { + // Future requests will find this block in fork choice, so no need to cache it in the + // ongoing lookup cache any longer. + self.cache.lock().in_progress_lookups.pop(&block_root); + } + + pub fn contains(&self, block_root: Hash256) -> bool { + self.cache.lock().block_roots.contains(&block_root) + } + + pub fn metrics(&self) -> Option<(usize, usize)> { + let cache = self.cache.try_lock_for(METRICS_TIMEOUT)?; + Some((cache.block_roots.len(), cache.in_progress_lookups.len())) + } +} diff --git a/beacon_node/beacon_chain/tests/attestation_verification.rs b/beacon_node/beacon_chain/tests/attestation_verification.rs index f5942a2be..3c675ec6a 100644 --- a/beacon_node/beacon_chain/tests/attestation_verification.rs +++ b/beacon_node/beacon_chain/tests/attestation_verification.rs @@ -5,7 +5,7 @@ use beacon_chain::{ test_utils::{ test_spec, AttestationStrategy, BeaconChainHarness, BlockStrategy, EphemeralHarnessType, }, - BeaconChain, BeaconChainTypes, WhenSlotSkipped, + BeaconChain, BeaconChainError, BeaconChainTypes, WhenSlotSkipped, }; use int_to_bytes::int_to_bytes32; use lazy_static::lazy_static; @@ -991,6 +991,81 @@ fn attestation_that_skips_epochs() { .expect("should gossip verify attestation that skips slots"); } +#[test] +fn attestation_to_finalized_block() { + let harness = get_harness(VALIDATOR_COUNT); + + // Extend the chain out a few epochs so we have some chain depth to play with. + harness.extend_chain( + MainnetEthSpec::slots_per_epoch() as usize * 4 + 1, + BlockStrategy::OnCanonicalHead, + AttestationStrategy::AllValidators, + ); + + let finalized_checkpoint = harness + .chain + .with_head(|head| Ok::<_, BeaconChainError>(head.beacon_state.finalized_checkpoint())) + .unwrap(); + assert!(finalized_checkpoint.epoch > 0); + + let current_slot = harness.get_current_slot(); + + let earlier_slot = finalized_checkpoint + .epoch + .start_slot(MainnetEthSpec::slots_per_epoch()) + - 1; + let earlier_block = harness + .chain + .block_at_slot(earlier_slot, WhenSlotSkipped::Prev) + .expect("should not error getting block at slot") + .expect("should find block at slot"); + let earlier_block_root = earlier_block.canonical_root(); + assert_ne!(earlier_block_root, finalized_checkpoint.root); + + let mut state = harness + .chain + .get_state(&earlier_block.state_root(), Some(earlier_slot)) + .expect("should not error getting state") + .expect("should find state"); + + while state.slot() < current_slot { + per_slot_processing(&mut state, None, &harness.spec).expect("should process slot"); + } + + let state_root = state.update_tree_hash_cache().unwrap(); + + let (attestation, subnet_id) = harness + .get_unaggregated_attestations( + &AttestationStrategy::AllValidators, + &state, + state_root, + earlier_block_root, + current_slot, + ) + .first() + .expect("should have at least one committee") + .first() + .cloned() + .expect("should have at least one attestation in committee"); + assert_eq!(attestation.data.beacon_block_root, earlier_block_root); + + // Attestation should be rejected for attesting to a pre-finalization block. + let res = harness + .chain + .verify_unaggregated_attestation_for_gossip(&attestation, Some(subnet_id)); + assert!( + matches!(res, Err(AttnError:: HeadBlockFinalized { beacon_block_root }) + if beacon_block_root == earlier_block_root + ) + ); + + // Pre-finalization block cache should contain the block root. + assert!(harness + .chain + .pre_finalization_block_cache + .contains(earlier_block_root)); +} + #[test] fn verify_aggregate_for_gossip_doppelganger_detection() { let harness = get_harness(VALIDATOR_COUNT); diff --git a/beacon_node/network/src/beacon_processor/worker/gossip_methods.rs b/beacon_node/network/src/beacon_processor/worker/gossip_methods.rs index 9ece18d02..132bed1b7 100644 --- a/beacon_node/network/src/beacon_processor/worker/gossip_methods.rs +++ b/beacon_node/network/src/beacon_processor/worker/gossip_methods.rs @@ -1701,6 +1701,26 @@ impl Worker { "attn_too_many_skipped_slots", ); } + AttnError::HeadBlockFinalized { beacon_block_root } => { + debug!( + self.log, + "Rejected attestation to finalized block"; + "block_root" => ?beacon_block_root, + "attestation_slot" => failed_att.attestation().data.slot, + ); + + // We have to reject the message as it isn't a descendant of the finalized + // checkpoint. + self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Reject); + + // The peer that sent us this could be a lagger, or a spammer, or this failure could + // be due to us processing attestations extremely slowly. Don't be too harsh. + self.gossip_penalize_peer( + peer_id, + PeerAction::HighToleranceError, + "attn_to_finalized_block", + ); + } AttnError::BeaconChainError(BeaconChainError::DBError(Error::HotColdDBError( HotColdDBError::AttestationStateIsFinalized { .. }, ))) => {