Fix corrupted DB on networks where the first slot is skipped (Holesky) (#4985)

* Fix zero block roots on skip slots.

* Remove temporary comment, println code and unused imports.

* Remove `println!` in test.
This commit is contained in:
Jimmy Chen 2023-12-07 15:12:06 +11:00 committed by GitHub
parent 8ba39cbf2c
commit 67e0569d9b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 135 additions and 14 deletions

View File

@ -46,7 +46,8 @@ pub fn upgrade_to_v18<T: BeaconChainTypes>(
db: Arc<HotColdDB<T::EthSpec, T::HotStore, T::ColdStore>>, db: Arc<HotColdDB<T::EthSpec, T::HotStore, T::ColdStore>>,
log: Logger, log: Logger,
) -> Result<Vec<KeyValueStoreOp>, Error> { ) -> Result<Vec<KeyValueStoreOp>, Error> {
db.heal_freezer_block_roots()?; db.heal_freezer_block_roots_at_split()?;
db.heal_freezer_block_roots_at_genesis()?;
info!(log, "Healed freezer block roots"); info!(log, "Healed freezer block roots");
// No-op, even if Deneb has already occurred. The database is probably borked in this case, but // No-op, even if Deneb has already occurred. The database is probably borked in this case, but

View File

@ -26,6 +26,7 @@ use std::collections::HashSet;
use std::convert::TryInto; use std::convert::TryInto;
use std::sync::Arc; use std::sync::Arc;
use std::time::Duration; use std::time::Duration;
use store::chunked_vector::Chunk;
use store::metadata::{SchemaVersion, CURRENT_SCHEMA_VERSION, STATE_UPPER_LIMIT_NO_RETAIN}; use store::metadata::{SchemaVersion, CURRENT_SCHEMA_VERSION, STATE_UPPER_LIMIT_NO_RETAIN};
use store::{ use store::{
chunked_vector::{chunk_key, Field}, chunked_vector::{chunk_key, Field},
@ -106,10 +107,10 @@ fn get_harness_generic(
harness harness
} }
/// Tests that `store.heal_freezer_block_roots` inserts block roots between last restore point /// Tests that `store.heal_freezer_block_roots_at_split` inserts block roots between last restore point
/// slot and the split slot. /// slot and the split slot.
#[tokio::test] #[tokio::test]
async fn heal_freezer_block_roots() { async fn heal_freezer_block_roots_at_split() {
// chunk_size is hard-coded to 128 // chunk_size is hard-coded to 128
let num_blocks_produced = E::slots_per_epoch() * 20; let num_blocks_produced = E::slots_per_epoch() * 20;
let db_path = tempdir().unwrap(); let db_path = tempdir().unwrap();
@ -136,7 +137,7 @@ async fn heal_freezer_block_roots() {
// Do a heal before deleting to make sure that it doesn't break. // Do a heal before deleting to make sure that it doesn't break.
let last_restore_point_slot = Slot::new(16 * E::slots_per_epoch()); let last_restore_point_slot = Slot::new(16 * E::slots_per_epoch());
store.heal_freezer_block_roots().unwrap(); store.heal_freezer_block_roots_at_split().unwrap();
check_freezer_block_roots(&harness, last_restore_point_slot, split_slot); check_freezer_block_roots(&harness, last_restore_point_slot, split_slot);
// Delete block roots between `last_restore_point_slot` and `split_slot`. // Delete block roots between `last_restore_point_slot` and `split_slot`.
@ -164,7 +165,7 @@ async fn heal_freezer_block_roots() {
assert!(matches!(block_root_err, store::Error::NoContinuationData)); assert!(matches!(block_root_err, store::Error::NoContinuationData));
// Re-insert block roots // Re-insert block roots
store.heal_freezer_block_roots().unwrap(); store.heal_freezer_block_roots_at_split().unwrap();
check_freezer_block_roots(&harness, last_restore_point_slot, split_slot); check_freezer_block_roots(&harness, last_restore_point_slot, split_slot);
// Run for another two epochs to check that the invariant is maintained. // Run for another two epochs to check that the invariant is maintained.
@ -243,7 +244,7 @@ async fn heal_freezer_block_roots_with_skip_slots() {
assert!(matches!(block_root_err, store::Error::NoContinuationData)); assert!(matches!(block_root_err, store::Error::NoContinuationData));
// heal function // heal function
store.heal_freezer_block_roots().unwrap(); store.heal_freezer_block_roots_at_split().unwrap();
check_freezer_block_roots(&harness, last_restore_point_slot, split_slot); check_freezer_block_roots(&harness, last_restore_point_slot, split_slot);
// Run for another two epochs to check that the invariant is maintained. // Run for another two epochs to check that the invariant is maintained.
@ -257,12 +258,84 @@ async fn heal_freezer_block_roots_with_skip_slots() {
check_iterators(&harness); check_iterators(&harness);
} }
fn check_freezer_block_roots( /// Tests that `store.heal_freezer_block_roots_at_genesis` replaces 0x0 block roots between slot
harness: &TestHarness, /// 0 and the first non-skip slot with genesis block root.
last_restore_point_slot: Slot, #[tokio::test]
split_slot: Slot, async fn heal_freezer_block_roots_at_genesis() {
) { // Run for a few epochs to ensure we're past finalization.
for slot in (last_restore_point_slot.as_u64()..split_slot.as_u64()).map(Slot::new) { let num_blocks_produced = E::slots_per_epoch() * 4;
let db_path = tempdir().unwrap();
let store = get_store(&db_path);
let harness = get_harness(store.clone(), LOW_VALIDATOR_COUNT);
// Start with 2 skip slots.
harness.advance_slot();
harness.advance_slot();
harness
.extend_chain(
num_blocks_produced as usize,
BlockStrategy::OnCanonicalHead,
AttestationStrategy::AllValidators,
)
.await;
// Do a heal before deleting to make sure that it doesn't break.
store.heal_freezer_block_roots_at_genesis().unwrap();
check_freezer_block_roots(
&harness,
Slot::new(0),
Epoch::new(1).end_slot(E::slots_per_epoch()),
);
// Write 0x0 block roots at slot 1 and slot 2.
let chunk_index = 0;
let chunk_db_key = chunk_key(chunk_index);
let mut chunk =
Chunk::<Hash256>::load(&store.cold_db, DBColumn::BeaconBlockRoots, &chunk_db_key)
.unwrap()
.unwrap();
chunk.values[1] = Hash256::zero();
chunk.values[2] = Hash256::zero();
let mut ops = vec![];
chunk
.store(DBColumn::BeaconBlockRoots, &chunk_db_key, &mut ops)
.unwrap();
store.cold_db.do_atomically(ops).unwrap();
// Ensure the DB is corrupted
let block_roots = store
.forwards_block_roots_iterator_until(
Slot::new(1),
Slot::new(2),
|| unreachable!(),
&harness.chain.spec,
)
.unwrap()
.map(Result::unwrap)
.take(2)
.collect::<Vec<_>>();
assert_eq!(
block_roots,
vec![
(Hash256::zero(), Slot::new(1)),
(Hash256::zero(), Slot::new(2))
]
);
// Insert genesis block roots at skip slots before first block slot
store.heal_freezer_block_roots_at_genesis().unwrap();
check_freezer_block_roots(
&harness,
Slot::new(0),
Epoch::new(1).end_slot(E::slots_per_epoch()),
);
}
fn check_freezer_block_roots(harness: &TestHarness, start_slot: Slot, end_slot: Slot) {
for slot in (start_slot.as_u64()..end_slot.as_u64()).map(Slot::new) {
let (block_root, result_slot) = harness let (block_root, result_slot) = harness
.chain .chain
.store .store

View File

@ -2216,7 +2216,7 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> HotColdDB<E, Hot, Cold>
/// This function fills in missing block roots between last restore point slot and split /// This function fills in missing block roots between last restore point slot and split
/// slot, if any. /// slot, if any.
pub fn heal_freezer_block_roots(&self) -> Result<(), Error> { pub fn heal_freezer_block_roots_at_split(&self) -> Result<(), Error> {
let split = self.get_split_info(); let split = self.get_split_info();
let last_restore_point_slot = (split.slot - 1) / self.config.slots_per_restore_point let last_restore_point_slot = (split.slot - 1) / self.config.slots_per_restore_point
* self.config.slots_per_restore_point; * self.config.slots_per_restore_point;
@ -2245,6 +2245,53 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> HotColdDB<E, Hot, Cold>
Ok(()) Ok(())
} }
pub fn heal_freezer_block_roots_at_genesis(&self) -> Result<(), Error> {
let oldest_block_slot = self.get_oldest_block_slot();
let split_slot = self.get_split_slot();
// Check if backfill has been completed AND the freezer db has data in it
if oldest_block_slot != 0 || split_slot == 0 {
return Ok(());
}
let mut block_root_iter = self.forwards_block_roots_iterator_until(
Slot::new(0),
split_slot - 1,
|| {
Err(Error::DBError {
message: "Should not require end state".to_string(),
})
},
&self.spec,
)?;
let (genesis_block_root, _) = block_root_iter.next().ok_or_else(|| Error::DBError {
message: "Genesis block root missing".to_string(),
})??;
let slots_to_fix = itertools::process_results(block_root_iter, |iter| {
iter.take_while(|(block_root, _)| block_root.is_zero())
.map(|(_, slot)| slot)
.collect::<Vec<_>>()
})?;
let Some(first_slot) = slots_to_fix.first() else {
return Ok(());
};
let mut chunk_writer =
ChunkWriter::<BlockRoots, _, _>::new(&self.cold_db, first_slot.as_usize())?;
let mut ops = vec![];
for slot in slots_to_fix {
chunk_writer.set(slot.as_usize(), genesis_block_root, &mut ops)?;
}
chunk_writer.write(&mut ops)?;
self.cold_db.do_atomically(ops)?;
Ok(())
}
/// Delete *all* states from the freezer database and update the anchor accordingly. /// Delete *all* states from the freezer database and update the anchor accordingly.
/// ///
/// WARNING: this method deletes the genesis state and replaces it with the provided /// WARNING: this method deletes the genesis state and replaces it with the provided
@ -2257,7 +2304,7 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> HotColdDB<E, Hot, Cold>
genesis_state: &BeaconState<E>, genesis_state: &BeaconState<E>,
) -> Result<(), Error> { ) -> Result<(), Error> {
// Make sure there is no missing block roots before pruning // Make sure there is no missing block roots before pruning
self.heal_freezer_block_roots()?; self.heal_freezer_block_roots_at_split()?;
// Update the anchor to use the dummy state upper limit and disable historic state storage. // Update the anchor to use the dummy state upper limit and disable historic state storage.
let old_anchor = self.get_anchor_info(); let old_anchor = self.get_anchor_info();