From bd39cc8e26ccb9566b03c8363d2f391daafaa1b0 Mon Sep 17 00:00:00 2001 From: Paul Hauner Date: Tue, 22 Sep 2020 02:06:10 +0000 Subject: [PATCH] Apply hotfix for inconsistent head (#1639) ## Issue Addressed - Resolves #1616 ## Proposed Changes If we look at the function which persists fork choice and the canonical head to disk: https://github.com/sigp/lighthouse/blob/1db8daae0c7bb34bf2e05644fa6bf313c2bea98e/beacon_node/beacon_chain/src/beacon_chain.rs#L234-L280 There is a race-condition which might cause the canonical head and fork choice values to be out-of-sync. I believe this is the cause of #1616. I managed to recreate the issue and produce a database that was unable to sync under the `master` branch but able to sync with this branch. These new changes solve the issue by ignoring the persisted `canonical_head_block_root` value and instead getting fork choice to generate it. This ensures that the canonical head is in-sync with fork choice. ## Additional Info This is hotfix method that leaves some crusty code hanging around. Once this PR is merged (to satisfy the v0.2.x users) we should later update and merge #1638 so we can have a clean fix for the v0.3.x versions. --- beacon_node/beacon_chain/src/builder.rs | 192 ++++++++++-------- .../src/persisted_beacon_chain.rs | 7 + .../beacon_chain/tests/persistence_tests.rs | 7 +- beacon_node/client/src/builder.rs | 7 +- consensus/fork_choice/src/fork_choice.rs | 8 +- 5 files changed, 129 insertions(+), 92 deletions(-) diff --git a/beacon_node/beacon_chain/src/builder.rs b/beacon_node/beacon_chain/src/builder.rs index 6ff94cfaa..ac9d1c4b1 100644 --- a/beacon_node/beacon_chain/src/builder.rs +++ b/beacon_node/beacon_chain/src/builder.rs @@ -97,11 +97,12 @@ pub struct BeaconChainBuilder { #[allow(clippy::type_complexity)] store: Option>>, store_migrator: Option, - canonical_head: Option>, - /// The finalized checkpoint to anchor the chain. May be genesis or a higher - /// checkpoint. - pub finalized_snapshot: Option>, + pub genesis_time: Option, genesis_block_root: Option, + #[allow(clippy::type_complexity)] + fork_choice: Option< + ForkChoice, T::EthSpec>, + >, op_pool: Option>, eth1_chain: Option>, event_handler: Option, @@ -146,9 +147,9 @@ where Self { store: None, store_migrator: None, - canonical_head: None, - finalized_snapshot: None, + genesis_time: None, genesis_block_root: None, + fork_choice: None, op_pool: None, eth1_chain: None, event_handler: None, @@ -278,22 +279,31 @@ where .to_string() })?; - self.genesis_block_root = Some(chain.genesis_block_root); - self.head_tracker = Some( - HeadTracker::from_ssz_container(&chain.ssz_head_tracker) - .map_err(|e| format!("Failed to decode head tracker for database: {:?}", e))?, - ); + let persisted_fork_choice = store + .get_item::(&Hash256::from_slice(&FORK_CHOICE_DB_KEY)) + .map_err(|e| format!("DB error when reading persisted fork choice: {:?}", e))? + .ok_or_else(|| "No persisted fork choice present in database.".to_string())?; - let head_block_root = chain.canonical_head_block_root; - let head_block = store - .get_item::>(&head_block_root) - .map_err(|e| format!("DB error when reading head block: {:?}", e))? - .ok_or_else(|| "Head block not found in store".to_string())?; - let head_state_root = head_block.state_root(); - let head_state = store - .get_state(&head_state_root, Some(head_block.slot())) - .map_err(|e| format!("DB error when reading head state: {:?}", e))? - .ok_or_else(|| "Head state not found in store".to_string())?; + let fc_store = BeaconForkChoiceStore::from_persisted( + persisted_fork_choice.fork_choice_store, + store.clone(), + ) + .map_err(|e| format!("Unable to load ForkChoiceStore: {:?}", e))?; + + let fork_choice = + ForkChoice::from_persisted(persisted_fork_choice.fork_choice, fc_store) + .map_err(|e| format!("Unable to parse persisted fork choice from disk: {:?}", e))?; + + let genesis_block = store + .get_item::>(&chain.genesis_block_root) + .map_err(|e| format!("DB error when reading genesis block: {:?}", e))? + .ok_or_else(|| "Genesis block not found in store".to_string())?; + let genesis_state = store + .get_state(&genesis_block.state_root(), Some(genesis_block.slot())) + .map_err(|e| format!("DB error when reading genesis state: {:?}", e))? + .ok_or_else(|| "Genesis block not found in store".to_string())?; + + self.genesis_time = Some(genesis_state.genesis_time); self.op_pool = Some( store @@ -303,35 +313,16 @@ where .unwrap_or_else(OperationPool::new), ); - let finalized_block_root = head_state.finalized_checkpoint.root; - let finalized_block = store - .get_item::>(&finalized_block_root) - .map_err(|e| format!("DB error when reading finalized block: {:?}", e))? - .ok_or_else(|| "Finalized block not found in store".to_string())?; - let finalized_state_root = finalized_block.state_root(); - let finalized_state = store - .get_state(&finalized_state_root, Some(finalized_block.slot())) - .map_err(|e| format!("DB error when reading finalized state: {:?}", e))? - .ok_or_else(|| "Finalized state not found in store".to_string())?; - - self.finalized_snapshot = Some(BeaconSnapshot { - beacon_block_root: finalized_block_root, - beacon_block: finalized_block, - beacon_state_root: finalized_state_root, - beacon_state: finalized_state, - }); - - self.canonical_head = Some(BeaconSnapshot { - beacon_block_root: head_block_root, - beacon_block: head_block, - beacon_state_root: head_state_root, - beacon_state: head_state, - }); - let pubkey_cache = ValidatorPubkeyCache::load_from_file(pubkey_cache_path) .map_err(|e| format!("Unable to open persisted pubkey cache: {:?}", e))?; + self.genesis_block_root = Some(chain.genesis_block_root); + self.head_tracker = Some( + HeadTracker::from_ssz_container(&chain.ssz_head_tracker) + .map_err(|e| format!("Failed to decode head tracker for database: {:?}", e))?, + ); self.validator_pubkey_cache = Some(pubkey_cache); + self.fork_choice = Some(fork_choice); Ok(self) } @@ -374,12 +365,20 @@ where ) })?; - self.finalized_snapshot = Some(BeaconSnapshot { + let genesis = BeaconSnapshot { beacon_block_root, beacon_block, beacon_state_root, beacon_state, - }); + }; + + let fc_store = BeaconForkChoiceStore::get_forkchoice_store(store, &genesis); + + let fork_choice = ForkChoice::from_genesis(fc_store, &genesis.beacon_block.message) + .map_err(|e| format!("Unable to build initialize ForkChoice: {:?}", e))?; + + self.fork_choice = Some(fork_choice); + self.genesis_time = Some(genesis.beacon_state.genesis_time); Ok(self.empty_op_pool()) } @@ -457,23 +456,73 @@ where .store .clone() .ok_or_else(|| "Cannot build without a store.".to_string())?; + let mut fork_choice = self + .fork_choice + .ok_or_else(|| "Cannot build without fork choice.".to_string())?; + let genesis_block_root = self + .genesis_block_root + .ok_or_else(|| "Cannot build without a genesis block root".to_string())?; - // If this beacon chain is being loaded from disk, use the stored head. Otherwise, just use - // the finalized checkpoint (which is probably genesis). - let mut canonical_head = if let Some(head) = self.canonical_head { - head + let current_slot = if slot_clock + .is_prior_to_genesis() + .ok_or_else(|| "Unable to read slot clock".to_string())? + { + self.spec.genesis_slot } else { - self.finalized_snapshot - .ok_or_else(|| "Cannot build without a state".to_string())? + slot_clock + .now() + .ok_or_else(|| "Unable to read slot".to_string())? }; + let head_block_root = fork_choice + .get_head(current_slot) + .map_err(|e| format!("Unable to get fork choice head: {:?}", e))?; + + let head_block = store + .get_item::>(&head_block_root) + .map_err(|e| format!("DB error when reading head block: {:?}", e))? + .ok_or_else(|| "Head block not found in store".to_string())?; + let head_state_root = head_block.state_root(); + let head_state = store + .get_state(&head_state_root, Some(head_block.slot())) + .map_err(|e| format!("DB error when reading head state: {:?}", e))? + .ok_or_else(|| "Head state not found in store".to_string())?; + + let mut canonical_head = BeaconSnapshot { + beacon_block_root: head_block_root, + beacon_block: head_block, + beacon_state_root: head_state_root, + beacon_state: head_state, + }; + + if canonical_head.beacon_block.state_root() != canonical_head.beacon_state_root { + return Err("beacon_block.state_root != beacon_state".to_string()); + } + canonical_head .beacon_state .build_all_caches(&self.spec) .map_err(|e| format!("Failed to build state caches: {:?}", e))?; - if canonical_head.beacon_block.state_root() != canonical_head.beacon_state_root { - return Err("beacon_block.state_root != beacon_state".to_string()); + // Perform a check to ensure that the finalization points of the head and fork choice are + // consistent. + // + // This is a sanity check to detect database corruption. + let fc_finalized = fork_choice.finalized_checkpoint(); + let head_finalized = canonical_head.beacon_state.finalized_checkpoint; + if fc_finalized != head_finalized { + if head_finalized.root == Hash256::zero() + && head_finalized.epoch == fc_finalized.epoch + && fc_finalized.root == genesis_block_root + { + // This is a legal edge-case encountered during genesis. + } else { + return Err(format!( + "Database corrupt: fork choice is finalized at {:?} whilst head is finalized at \ + {:?}", + fc_finalized, head_finalized + )); + } } let pubkey_cache_path = self @@ -485,26 +534,6 @@ where .map_err(|e| format!("Unable to init validator pubkey cache: {:?}", e)) })?; - let persisted_fork_choice = store - .get_item::(&Hash256::from_slice(&FORK_CHOICE_DB_KEY)) - .map_err(|e| format!("DB error when reading persisted fork choice: {:?}", e))?; - - let fork_choice = if let Some(persisted) = persisted_fork_choice { - let fc_store = - BeaconForkChoiceStore::from_persisted(persisted.fork_choice_store, store.clone()) - .map_err(|e| format!("Unable to load ForkChoiceStore: {:?}", e))?; - - ForkChoice::from_persisted(persisted.fork_choice, fc_store) - .map_err(|e| format!("Unable to parse persisted fork choice from disk: {:?}", e))? - } else { - let genesis = &canonical_head; - - let fc_store = BeaconForkChoiceStore::get_forkchoice_store(store.clone(), genesis); - - ForkChoice::from_genesis(fc_store, &genesis.beacon_block.message) - .map_err(|e| format!("Unable to build initialize ForkChoice: {:?}", e))? - }; - let beacon_chain = BeaconChain { spec: self.spec, config: self.chain_config, @@ -533,9 +562,7 @@ where eth1_chain: self.eth1_chain, genesis_validators_root: canonical_head.beacon_state.genesis_validators_root, canonical_head: TimeoutRwLock::new(canonical_head.clone()), - genesis_block_root: self - .genesis_block_root - .ok_or_else(|| "Cannot build without a genesis block root".to_string())?, + genesis_block_root, fork_choice: RwLock::new(fork_choice), event_handler: self .event_handler @@ -634,11 +661,8 @@ where /// Requires the state to be initialized. pub fn testing_slot_clock(self, slot_duration: Duration) -> Result { let genesis_time = self - .finalized_snapshot - .as_ref() - .ok_or_else(|| "testing_slot_clock requires an initialized state")? - .beacon_state - .genesis_time; + .genesis_time + .ok_or_else(|| "testing_slot_clock requires an initialized state")?; let slot_clock = TestingSlotClock::new( Slot::new(0), diff --git a/beacon_node/beacon_chain/src/persisted_beacon_chain.rs b/beacon_node/beacon_chain/src/persisted_beacon_chain.rs index 2ca29e9ed..dcdf710d6 100644 --- a/beacon_node/beacon_chain/src/persisted_beacon_chain.rs +++ b/beacon_node/beacon_chain/src/persisted_beacon_chain.rs @@ -6,6 +6,13 @@ use types::Hash256; #[derive(Clone, Encode, Decode)] pub struct PersistedBeaconChain { + /// This value is ignored to resolve the issue described here: + /// + /// https://github.com/sigp/lighthouse/pull/1639 + /// + /// The following PR will clean-up and remove this field: + /// + /// https://github.com/sigp/lighthouse/pull/1638 pub canonical_head_block_root: Hash256, pub genesis_block_root: Hash256, pub ssz_head_tracker: SszHeadTracker, diff --git a/beacon_node/beacon_chain/tests/persistence_tests.rs b/beacon_node/beacon_chain/tests/persistence_tests.rs index b5bc3cae1..1666f34bb 100644 --- a/beacon_node/beacon_chain/tests/persistence_tests.rs +++ b/beacon_node/beacon_chain/tests/persistence_tests.rs @@ -153,8 +153,11 @@ fn assert_chains_pretty_much_the_same(a: &BeaconChain, b a.genesis_block_root, b.genesis_block_root, "genesis_block_root should be equal" ); + + let slot = a.slot().unwrap(); assert!( - *a.fork_choice.read() == *b.fork_choice.read(), - "fork_choice should be equal" + a.fork_choice.write().get_head(slot).unwrap() + == b.fork_choice.write().get_head(slot).unwrap(), + "fork_choice heads should be equal" ); } diff --git a/beacon_node/client/src/builder.rs b/beacon_node/client/src/builder.rs index c77ddecef..16e7a0264 100644 --- a/beacon_node/client/src/builder.rs +++ b/beacon_node/client/src/builder.rs @@ -713,11 +713,8 @@ where .ok_or_else(|| "system_time_slot_clock requires a beacon_chain_builder")?; let genesis_time = beacon_chain_builder - .finalized_snapshot - .as_ref() - .ok_or_else(|| "system_time_slot_clock requires an initialized beacon state")? - .beacon_state - .genesis_time; + .genesis_time + .ok_or_else(|| "system_time_slot_clock requires an initialized beacon state")?; let spec = self .chain_spec diff --git a/consensus/fork_choice/src/fork_choice.rs b/consensus/fork_choice/src/fork_choice.rs index 804771330..99f998e55 100644 --- a/consensus/fork_choice/src/fork_choice.rs +++ b/consensus/fork_choice/src/fork_choice.rs @@ -3,7 +3,8 @@ use std::marker::PhantomData; use proto_array::{Block as ProtoBlock, ProtoArrayForkChoice}; use ssz_derive::{Decode, Encode}; use types::{ - BeaconBlock, BeaconState, BeaconStateError, Epoch, EthSpec, Hash256, IndexedAttestation, Slot, + BeaconBlock, BeaconState, BeaconStateError, Checkpoint, Epoch, EthSpec, Hash256, + IndexedAttestation, Slot, }; use crate::ForkChoiceStore; @@ -758,6 +759,11 @@ where .is_descendant(self.fc_store.finalized_checkpoint().root, block_root) } + /// Return the current finalized checkpoint. + pub fn finalized_checkpoint(&self) -> Checkpoint { + *self.fc_store.finalized_checkpoint() + } + /// Returns the latest message for a given validator, if any. /// /// Returns `(block_root, block_slot)`.