From 9246a92d76bb06c0b61adefc87f5e3467eb22c03 Mon Sep 17 00:00:00 2001 From: Paul Hauner Date: Fri, 23 Sep 2022 03:52:44 +0000 Subject: [PATCH] Make garbage collection test less failure prone (#3599) ## Issue Addressed NA ## Proposed Changes This PR attempts to fix the following spurious CI failure: ``` ---- store_tests::garbage_collect_temp_states_from_failed_block stdout ---- thread 'store_tests::garbage_collect_temp_states_from_failed_block' panicked at 'disk store should initialize: DBError { message: "Error { message: \"IO error: lock /tmp/.tmp6DcBQ9/cold_db/LOCK: already held by process\" }" }', beacon_node/beacon_chain/tests/store_tests.rs:59:10 ``` I believe that some async task is taking a clone of the store and holding it in some other thread for a short time. This creates a race-condition when we try to open a new instance of the store. ## Additional Info NA --- beacon_node/beacon_chain/tests/store_tests.rs | 75 +++++++++++-------- 1 file changed, 43 insertions(+), 32 deletions(-) diff --git a/beacon_node/beacon_chain/tests/store_tests.rs b/beacon_node/beacon_chain/tests/store_tests.rs index 2fcd74be4..883b871b1 100644 --- a/beacon_node/beacon_chain/tests/store_tests.rs +++ b/beacon_node/beacon_chain/tests/store_tests.rs @@ -26,6 +26,7 @@ use store::{ HotColdDB, LevelDB, StoreConfig, }; use tempfile::{tempdir, TempDir}; +use tokio::time::sleep; use tree_hash::TreeHash; use types::test_utils::{SeedableRng, XorShiftRng}; use types::*; @@ -1985,45 +1986,55 @@ async fn pruning_test( check_no_blocks_exist(&harness, stray_blocks.values()); } -#[test] -fn garbage_collect_temp_states_from_failed_block() { +#[tokio::test] +async fn garbage_collect_temp_states_from_failed_block() { let db_path = tempdir().unwrap(); - let store = get_store(&db_path); - let harness = get_harness(store.clone(), LOW_VALIDATOR_COUNT); - // Use a `block_on_dangerous` rather than an async test to stop spawned processes from holding - // a reference to the store. - harness.chain.task_executor.clone().block_on_dangerous( - async move { - let slots_per_epoch = E::slots_per_epoch(); + // Wrap these functions to ensure the variables are dropped before we try to open another + // instance of the store. + let mut store = { + let store = get_store(&db_path); + let harness = get_harness(store.clone(), LOW_VALIDATOR_COUNT); - let genesis_state = harness.get_current_state(); - let block_slot = Slot::new(2 * slots_per_epoch); - let (signed_block, state) = harness.make_block(genesis_state, block_slot).await; + let slots_per_epoch = E::slots_per_epoch(); - let (mut block, _) = signed_block.deconstruct(); + let genesis_state = harness.get_current_state(); + let block_slot = Slot::new(2 * slots_per_epoch); + let (signed_block, state) = harness.make_block(genesis_state, block_slot).await; - // Mutate the block to make it invalid, and re-sign it. - *block.state_root_mut() = Hash256::repeat_byte(0xff); - let proposer_index = block.proposer_index() as usize; - let block = block.sign( - &harness.validator_keypairs[proposer_index].sk, - &state.fork(), - state.genesis_validators_root(), - &harness.spec, - ); + let (mut block, _) = signed_block.deconstruct(); - // The block should be rejected, but should store a bunch of temporary states. - harness.set_current_slot(block_slot); - harness.process_block_result(block).await.unwrap_err(); + // Mutate the block to make it invalid, and re-sign it. + *block.state_root_mut() = Hash256::repeat_byte(0xff); + let proposer_index = block.proposer_index() as usize; + let block = block.sign( + &harness.validator_keypairs[proposer_index].sk, + &state.fork(), + state.genesis_validators_root(), + &harness.spec, + ); - assert_eq!( - store.iter_temporary_state_roots().count(), - block_slot.as_usize() - 1 - ); - }, - "test", - ); + // The block should be rejected, but should store a bunch of temporary states. + harness.set_current_slot(block_slot); + harness.process_block_result(block).await.unwrap_err(); + + assert_eq!( + store.iter_temporary_state_roots().count(), + block_slot.as_usize() - 1 + ); + store + }; + + // Wait until all the references to the store have been dropped, this helps ensure we can + // re-open the store later. + loop { + store = if let Err(store_arc) = Arc::try_unwrap(store) { + sleep(Duration::from_millis(500)).await; + store_arc + } else { + break; + } + } // On startup, the store should garbage collect all the temporary states. let store = get_store(&db_path);