diff --git a/beacon_node/beacon_chain/src/beacon_chain.rs b/beacon_node/beacon_chain/src/beacon_chain.rs index bf4180547..fcd7be791 100644 --- a/beacon_node/beacon_chain/src/beacon_chain.rs +++ b/beacon_node/beacon_chain/src/beacon_chain.rs @@ -30,7 +30,7 @@ use crate::eth1_finalization_cache::{Eth1FinalizationCache, Eth1FinalizationData use crate::events::ServerSentEventHandler; use crate::execution_payload::{get_execution_payload, NotifyExecutionLayer, PreparePayloadHandle}; use crate::fork_choice_signal::{ForkChoiceSignalRx, ForkChoiceSignalTx, ForkChoiceWaitResult}; -use crate::head_tracker::HeadTracker; +use crate::head_tracker::{HeadTracker, HeadTrackerReader, SszHeadTracker}; use crate::historical_blocks::HistoricalBlockError; use crate::light_client_finality_update_verification::{ Error as LightClientFinalityUpdateError, VerifiedLightClientFinalityUpdate, @@ -610,12 +610,19 @@ impl BeaconChain { let mut batch = vec![]; let _head_timer = metrics::start_timer(&metrics::PERSIST_HEAD); - batch.push(self.persist_head_in_batch()); + + // Hold a lock to head_tracker until it has been persisted to disk. Otherwise there's a race + // condition with the pruning thread which can result in a block present in the head tracker + // but absent in the DB. This inconsistency halts pruning and dramastically increases disk + // size. Ref: https://github.com/sigp/lighthouse/issues/4773 + let head_tracker = self.head_tracker.0.read(); + batch.push(self.persist_head_in_batch(&head_tracker)); let _fork_choice_timer = metrics::start_timer(&metrics::PERSIST_FORK_CHOICE); batch.push(self.persist_fork_choice_in_batch()); self.store.hot_db.do_atomically(batch)?; + drop(head_tracker); Ok(()) } @@ -623,25 +630,28 @@ impl BeaconChain { /// Return a `PersistedBeaconChain` without reference to a `BeaconChain`. pub fn make_persisted_head( genesis_block_root: Hash256, - head_tracker: &HeadTracker, + head_tracker_reader: &HeadTrackerReader, ) -> PersistedBeaconChain { PersistedBeaconChain { _canonical_head_block_root: DUMMY_CANONICAL_HEAD_BLOCK_ROOT, genesis_block_root, - ssz_head_tracker: head_tracker.to_ssz_container(), + ssz_head_tracker: SszHeadTracker::from_map(head_tracker_reader), } } /// Return a database operation for writing the beacon chain head to disk. - pub fn persist_head_in_batch(&self) -> KeyValueStoreOp { - Self::persist_head_in_batch_standalone(self.genesis_block_root, &self.head_tracker) + pub fn persist_head_in_batch( + &self, + head_tracker_reader: &HeadTrackerReader, + ) -> KeyValueStoreOp { + Self::persist_head_in_batch_standalone(self.genesis_block_root, head_tracker_reader) } pub fn persist_head_in_batch_standalone( genesis_block_root: Hash256, - head_tracker: &HeadTracker, + head_tracker_reader: &HeadTrackerReader, ) -> KeyValueStoreOp { - Self::make_persisted_head(genesis_block_root, head_tracker) + Self::make_persisted_head(genesis_block_root, head_tracker_reader) .as_kv_store_op(BEACON_CHAIN_DB_KEY) } @@ -1341,6 +1351,7 @@ impl BeaconChain { self.head_tracker.heads() } + /// Only used in tests. pub fn knows_head(&self, block_hash: &SignedBeaconBlockHash) -> bool { self.head_tracker.contains_head((*block_hash).into()) } diff --git a/beacon_node/beacon_chain/src/builder.rs b/beacon_node/beacon_chain/src/builder.rs index d36d996f0..330036d43 100644 --- a/beacon_node/beacon_chain/src/builder.rs +++ b/beacon_node/beacon_chain/src/builder.rs @@ -805,10 +805,11 @@ where // // This *must* be stored before constructing the `BeaconChain`, so that its `Drop` instance // doesn't write a `PersistedBeaconChain` without the rest of the batch. + let head_tracker_reader = head_tracker.0.read(); self.pending_io_batch.push(BeaconChain::< Witness, >::persist_head_in_batch_standalone( - genesis_block_root, &head_tracker + genesis_block_root, &head_tracker_reader )); self.pending_io_batch.push(BeaconChain::< Witness, @@ -819,6 +820,7 @@ where .hot_db .do_atomically(self.pending_io_batch) .map_err(|e| format!("Error writing chain & metadata to disk: {:?}", e))?; + drop(head_tracker_reader); let genesis_validators_root = head_snapshot.beacon_state.genesis_validators_root(); let genesis_time = head_snapshot.beacon_state.genesis_time(); diff --git a/beacon_node/beacon_chain/src/head_tracker.rs b/beacon_node/beacon_chain/src/head_tracker.rs index 3fa577ff9..71e2473cd 100644 --- a/beacon_node/beacon_chain/src/head_tracker.rs +++ b/beacon_node/beacon_chain/src/head_tracker.rs @@ -1,4 +1,4 @@ -use parking_lot::RwLock; +use parking_lot::{RwLock, RwLockReadGuard}; use ssz_derive::{Decode, Encode}; use std::collections::HashMap; use types::{Hash256, Slot}; @@ -16,6 +16,8 @@ pub enum Error { #[derive(Default, Debug)] pub struct HeadTracker(pub RwLock>); +pub type HeadTrackerReader<'a> = RwLockReadGuard<'a, HashMap>; + impl HeadTracker { /// Register a block with `Self`, so it may or may not be included in a `Self::heads` call. /// @@ -44,6 +46,11 @@ impl HeadTracker { /// Returns a `SszHeadTracker`, which contains all necessary information to restore the state /// of `Self` at some later point. + /// + /// Should ONLY be used for tests, due to the potential for database races. + /// + /// See + #[cfg(test)] pub fn to_ssz_container(&self) -> SszHeadTracker { SszHeadTracker::from_map(&self.0.read()) }