Fix off-by-one bug in missed block detection (#5012)

* Regression test * Fix the bug
2023-12-15 14:23:54 +11:00 · 2023-12-15 14:23:54 +11:00 · f1113540d8
commit f1113540d8
parent 366f0d7ac2
2 changed files with 102 additions and 16 deletions
--- a/beacon_node/beacon_chain/src/validator_monitor.rs
+++ b/beacon_node/beacon_chain/src/validator_monitor.rs
@ -442,7 +442,7 @@ impl<T: EthSpec> ValidatorMonitor<T> {
    }
    /// Add some validators to `self` for additional monitoring.
-    fn add_validator_pubkey(&mut self, pubkey: PublicKeyBytes) {
+    pub fn add_validator_pubkey(&mut self, pubkey: PublicKeyBytes) {
        let index_opt = self
            .indices
            .iter()
@ -602,8 +602,10 @@ impl<T: EthSpec> ValidatorMonitor<T> {
        let end_slot = current_slot.saturating_sub(MISSED_BLOCK_LAG_SLOTS).as_u64();
-        // List of proposers per epoch from the beacon_proposer_cache
+        // List of proposers per epoch from the beacon_proposer_cache, and the epoch at which the
-        let mut proposers_per_epoch: Option<SmallVec<[usize; TYPICAL_SLOTS_PER_EPOCH]>> = None;
+        // cache is valid.
        let mut proposers_per_epoch: Option<(SmallVec<[usize; TYPICAL_SLOTS_PER_EPOCH]>, Epoch)> =
            None;
        for (prev_slot, slot) in (start_slot.as_u64()..=end_slot)
            .map(Slot::new)
@ -617,25 +619,30 @@ impl<T: EthSpec> ValidatorMonitor<T> {
                // Found missed block
                if block_root == prev_block_root {
                    let slot_epoch = slot.epoch(T::slots_per_epoch());
                    let prev_slot_epoch = prev_slot.epoch(T::slots_per_epoch());
                    if let Ok(shuffling_decision_block) =
                        state.proposer_shuffling_decision_root_at_epoch(slot_epoch, *block_root)
                    {
-                        // Only update the cache if it needs to be initialised or because
+                        // Update the cache if it has not yet been initialised, or if it is
-                        // slot is at epoch + 1
+                        // initialised for a prior epoch. This is an optimisation to avoid bouncing
-                        if proposers_per_epoch.is_none() || slot_epoch != prev_slot_epoch {
+                        // the proposer shuffling cache lock when there are lots of missed blocks.
-                            proposers_per_epoch = self.get_proposers_by_epoch_from_cache(
+                        if proposers_per_epoch
-                                slot_epoch,
+                            .as_ref()
-                                shuffling_decision_block,
+                            .map_or(true, |(_, cached_epoch)| *cached_epoch != slot_epoch)
-                            );
+                        {
                            proposers_per_epoch = self
                                .get_proposers_by_epoch_from_cache(
                                    slot_epoch,
                                    shuffling_decision_block,
                                )
                                .map(|cache| (cache, slot_epoch));
                        }
                        // Only add missed blocks for the proposer if it's in the list of monitored validators
                        let slot_in_epoch = slot % T::slots_per_epoch();
                        if let Some(proposer_index) = proposers_per_epoch
-                            .as_deref()
+                            .as_ref()
-                            .and_then(|proposers| proposers.get(slot_in_epoch.as_usize()))
+                            .and_then(|(proposers, _)| proposers.get(slot_in_epoch.as_usize()))
                        {
                            let i = *proposer_index as u64;
                            if let Some(pub_key) = self.indices.get(&i) {
@ -674,7 +681,8 @@ impl<T: EthSpec> ValidatorMonitor<T> {
                            debug!(
                                self.log,
                                "Could not get proposers from cache";
-                                "epoch" => ?slot_epoch
+                                "epoch" => ?slot_epoch,
                                "decision_root" => ?shuffling_decision_block,
                            );
                        }
                    }
--- a/beacon_node/beacon_chain/tests/validator_monitor.rs
+++ b/beacon_node/beacon_chain/tests/validator_monitor.rs
@ -1,9 +1,9 @@
 use lazy_static::lazy_static;
 use beacon_chain::test_utils::{
    AttestationStrategy, BeaconChainHarness, BlockStrategy, EphemeralHarnessType,
 };
 use beacon_chain::validator_monitor::{ValidatorMonitorConfig, MISSED_BLOCK_LAG_SLOTS};
 use lazy_static::lazy_static;
 use logging::test_logger;
 use types::{Epoch, EthSpec, Keypair, MainnetEthSpec, PublicKeyBytes, Slot};
 // Should ideally be divisible by 3.
@ -23,6 +23,7 @@ fn get_harness(
    let harness = BeaconChainHarness::builder(MainnetEthSpec)
        .default_spec()
        .keypairs(KEYPAIRS[0..validator_count].to_vec())
        .logger(test_logger())
        .fresh_ephemeral_store()
        .mock_execution_layer()
        .validator_monitor_config(ValidatorMonitorConfig {
@ -39,6 +40,83 @@ fn get_harness(
    harness
 }
 // Regression test for off-by-one caching issue in missed block detection.
 #[tokio::test]
 async fn missed_blocks_across_epochs() {
    let slots_per_epoch = E::slots_per_epoch();
    let all_validators = (0..VALIDATOR_COUNT).collect::<Vec<_>>();
    let harness = get_harness(VALIDATOR_COUNT, vec![]);
    let validator_monitor = &harness.chain.validator_monitor;
    let mut genesis_state = harness.get_current_state();
    let genesis_state_root = genesis_state.update_tree_hash_cache().unwrap();
    let genesis_block_root = harness.head_block_root();
    // Skip a slot in the first epoch (to prime the cache inside the missed block function) and then
    // at a different offset in the 2nd epoch. The missed block in the 2nd epoch MUST NOT reuse
    // the cache from the first epoch.
    let first_skip_offset = 3;
    let second_skip_offset = slots_per_epoch / 2;
    assert_ne!(first_skip_offset, second_skip_offset);
    let first_skip_slot = Slot::new(first_skip_offset);
    let second_skip_slot = Slot::new(slots_per_epoch + second_skip_offset);
    let slots = (1..2 * slots_per_epoch)
        .map(Slot::new)
        .filter(|slot| *slot != first_skip_slot && *slot != second_skip_slot)
        .collect::<Vec<_>>();
    let (block_roots_by_slot, state_roots_by_slot, _, head_state) = harness
        .add_attested_blocks_at_slots(genesis_state, genesis_state_root, &slots, &all_validators)
        .await;
    // Prime the proposer shuffling cache.
    let mut proposer_shuffling_cache = harness.chain.beacon_proposer_cache.lock();
    for epoch in [0, 1].into_iter().map(Epoch::new) {
        let start_slot = epoch.start_slot(slots_per_epoch) + 1;
        let state = harness
            .get_hot_state(state_roots_by_slot[&start_slot])
            .unwrap();
        let decision_root = state
            .proposer_shuffling_decision_root(genesis_block_root)
            .unwrap();
        proposer_shuffling_cache
            .insert(
                epoch,
                decision_root,
                state
                    .get_beacon_proposer_indices(&harness.chain.spec)
                    .unwrap(),
                state.fork(),
            )
            .unwrap();
    }
    drop(proposer_shuffling_cache);
    // Monitor the validator that proposed the block at the same offset in the 0th epoch as the skip
    // in the 1st epoch.
    let innocent_proposer_slot = Slot::new(second_skip_offset);
    let innocent_proposer = harness
        .get_block(block_roots_by_slot[&innocent_proposer_slot])
        .unwrap()
        .message()
        .proposer_index();
    let mut vm_write = validator_monitor.write();
    // Call `process_` once to update validator indices.
    vm_write.process_valid_state(head_state.current_epoch(), &head_state, &harness.chain.spec);
    // Start monitoring the innocent validator.
    vm_write.add_validator_pubkey(KEYPAIRS[innocent_proposer as usize].pk.compress());
    // Check for missed blocks.
    vm_write.process_valid_state(head_state.current_epoch(), &head_state, &harness.chain.spec);
    // My client is innocent, your honour!
    assert_eq!(
        vm_write.get_monitored_validator_missed_block_count(innocent_proposer),
        0
    );
 }
 #[tokio::test]
 async fn produces_missed_blocks() {
    let validator_count = 16;