Fix off-by-one bug in missed block detection (#5012)

* Regression test

* Fix the bug
This commit is contained in:
Michael Sproul 2023-12-15 14:23:54 +11:00 committed by GitHub
parent 366f0d7ac2
commit f1113540d8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 102 additions and 16 deletions

View File

@ -442,7 +442,7 @@ impl<T: EthSpec> ValidatorMonitor<T> {
} }
/// Add some validators to `self` for additional monitoring. /// Add some validators to `self` for additional monitoring.
fn add_validator_pubkey(&mut self, pubkey: PublicKeyBytes) { pub fn add_validator_pubkey(&mut self, pubkey: PublicKeyBytes) {
let index_opt = self let index_opt = self
.indices .indices
.iter() .iter()
@ -602,8 +602,10 @@ impl<T: EthSpec> ValidatorMonitor<T> {
let end_slot = current_slot.saturating_sub(MISSED_BLOCK_LAG_SLOTS).as_u64(); let end_slot = current_slot.saturating_sub(MISSED_BLOCK_LAG_SLOTS).as_u64();
// List of proposers per epoch from the beacon_proposer_cache // List of proposers per epoch from the beacon_proposer_cache, and the epoch at which the
let mut proposers_per_epoch: Option<SmallVec<[usize; TYPICAL_SLOTS_PER_EPOCH]>> = None; // cache is valid.
let mut proposers_per_epoch: Option<(SmallVec<[usize; TYPICAL_SLOTS_PER_EPOCH]>, Epoch)> =
None;
for (prev_slot, slot) in (start_slot.as_u64()..=end_slot) for (prev_slot, slot) in (start_slot.as_u64()..=end_slot)
.map(Slot::new) .map(Slot::new)
@ -617,25 +619,30 @@ impl<T: EthSpec> ValidatorMonitor<T> {
// Found missed block // Found missed block
if block_root == prev_block_root { if block_root == prev_block_root {
let slot_epoch = slot.epoch(T::slots_per_epoch()); let slot_epoch = slot.epoch(T::slots_per_epoch());
let prev_slot_epoch = prev_slot.epoch(T::slots_per_epoch());
if let Ok(shuffling_decision_block) = if let Ok(shuffling_decision_block) =
state.proposer_shuffling_decision_root_at_epoch(slot_epoch, *block_root) state.proposer_shuffling_decision_root_at_epoch(slot_epoch, *block_root)
{ {
// Only update the cache if it needs to be initialised or because // Update the cache if it has not yet been initialised, or if it is
// slot is at epoch + 1 // initialised for a prior epoch. This is an optimisation to avoid bouncing
if proposers_per_epoch.is_none() || slot_epoch != prev_slot_epoch { // the proposer shuffling cache lock when there are lots of missed blocks.
proposers_per_epoch = self.get_proposers_by_epoch_from_cache( if proposers_per_epoch
slot_epoch, .as_ref()
shuffling_decision_block, .map_or(true, |(_, cached_epoch)| *cached_epoch != slot_epoch)
); {
proposers_per_epoch = self
.get_proposers_by_epoch_from_cache(
slot_epoch,
shuffling_decision_block,
)
.map(|cache| (cache, slot_epoch));
} }
// Only add missed blocks for the proposer if it's in the list of monitored validators // Only add missed blocks for the proposer if it's in the list of monitored validators
let slot_in_epoch = slot % T::slots_per_epoch(); let slot_in_epoch = slot % T::slots_per_epoch();
if let Some(proposer_index) = proposers_per_epoch if let Some(proposer_index) = proposers_per_epoch
.as_deref() .as_ref()
.and_then(|proposers| proposers.get(slot_in_epoch.as_usize())) .and_then(|(proposers, _)| proposers.get(slot_in_epoch.as_usize()))
{ {
let i = *proposer_index as u64; let i = *proposer_index as u64;
if let Some(pub_key) = self.indices.get(&i) { if let Some(pub_key) = self.indices.get(&i) {
@ -674,7 +681,8 @@ impl<T: EthSpec> ValidatorMonitor<T> {
debug!( debug!(
self.log, self.log,
"Could not get proposers from cache"; "Could not get proposers from cache";
"epoch" => ?slot_epoch "epoch" => ?slot_epoch,
"decision_root" => ?shuffling_decision_block,
); );
} }
} }

View File

@ -1,9 +1,9 @@
use lazy_static::lazy_static;
use beacon_chain::test_utils::{ use beacon_chain::test_utils::{
AttestationStrategy, BeaconChainHarness, BlockStrategy, EphemeralHarnessType, AttestationStrategy, BeaconChainHarness, BlockStrategy, EphemeralHarnessType,
}; };
use beacon_chain::validator_monitor::{ValidatorMonitorConfig, MISSED_BLOCK_LAG_SLOTS}; use beacon_chain::validator_monitor::{ValidatorMonitorConfig, MISSED_BLOCK_LAG_SLOTS};
use lazy_static::lazy_static;
use logging::test_logger;
use types::{Epoch, EthSpec, Keypair, MainnetEthSpec, PublicKeyBytes, Slot}; use types::{Epoch, EthSpec, Keypair, MainnetEthSpec, PublicKeyBytes, Slot};
// Should ideally be divisible by 3. // Should ideally be divisible by 3.
@ -23,6 +23,7 @@ fn get_harness(
let harness = BeaconChainHarness::builder(MainnetEthSpec) let harness = BeaconChainHarness::builder(MainnetEthSpec)
.default_spec() .default_spec()
.keypairs(KEYPAIRS[0..validator_count].to_vec()) .keypairs(KEYPAIRS[0..validator_count].to_vec())
.logger(test_logger())
.fresh_ephemeral_store() .fresh_ephemeral_store()
.mock_execution_layer() .mock_execution_layer()
.validator_monitor_config(ValidatorMonitorConfig { .validator_monitor_config(ValidatorMonitorConfig {
@ -39,6 +40,83 @@ fn get_harness(
harness harness
} }
// Regression test for off-by-one caching issue in missed block detection.
#[tokio::test]
async fn missed_blocks_across_epochs() {
let slots_per_epoch = E::slots_per_epoch();
let all_validators = (0..VALIDATOR_COUNT).collect::<Vec<_>>();
let harness = get_harness(VALIDATOR_COUNT, vec![]);
let validator_monitor = &harness.chain.validator_monitor;
let mut genesis_state = harness.get_current_state();
let genesis_state_root = genesis_state.update_tree_hash_cache().unwrap();
let genesis_block_root = harness.head_block_root();
// Skip a slot in the first epoch (to prime the cache inside the missed block function) and then
// at a different offset in the 2nd epoch. The missed block in the 2nd epoch MUST NOT reuse
// the cache from the first epoch.
let first_skip_offset = 3;
let second_skip_offset = slots_per_epoch / 2;
assert_ne!(first_skip_offset, second_skip_offset);
let first_skip_slot = Slot::new(first_skip_offset);
let second_skip_slot = Slot::new(slots_per_epoch + second_skip_offset);
let slots = (1..2 * slots_per_epoch)
.map(Slot::new)
.filter(|slot| *slot != first_skip_slot && *slot != second_skip_slot)
.collect::<Vec<_>>();
let (block_roots_by_slot, state_roots_by_slot, _, head_state) = harness
.add_attested_blocks_at_slots(genesis_state, genesis_state_root, &slots, &all_validators)
.await;
// Prime the proposer shuffling cache.
let mut proposer_shuffling_cache = harness.chain.beacon_proposer_cache.lock();
for epoch in [0, 1].into_iter().map(Epoch::new) {
let start_slot = epoch.start_slot(slots_per_epoch) + 1;
let state = harness
.get_hot_state(state_roots_by_slot[&start_slot])
.unwrap();
let decision_root = state
.proposer_shuffling_decision_root(genesis_block_root)
.unwrap();
proposer_shuffling_cache
.insert(
epoch,
decision_root,
state
.get_beacon_proposer_indices(&harness.chain.spec)
.unwrap(),
state.fork(),
)
.unwrap();
}
drop(proposer_shuffling_cache);
// Monitor the validator that proposed the block at the same offset in the 0th epoch as the skip
// in the 1st epoch.
let innocent_proposer_slot = Slot::new(second_skip_offset);
let innocent_proposer = harness
.get_block(block_roots_by_slot[&innocent_proposer_slot])
.unwrap()
.message()
.proposer_index();
let mut vm_write = validator_monitor.write();
// Call `process_` once to update validator indices.
vm_write.process_valid_state(head_state.current_epoch(), &head_state, &harness.chain.spec);
// Start monitoring the innocent validator.
vm_write.add_validator_pubkey(KEYPAIRS[innocent_proposer as usize].pk.compress());
// Check for missed blocks.
vm_write.process_valid_state(head_state.current_epoch(), &head_state, &harness.chain.spec);
// My client is innocent, your honour!
assert_eq!(
vm_write.get_monitored_validator_missed_block_count(innocent_proposer),
0
);
}
#[tokio::test] #[tokio::test]
async fn produces_missed_blocks() { async fn produces_missed_blocks() {
let validator_count = 16; let validator_count = 16;