From 406e3921d97f023732fe9c30fe750f63e02bbb9b Mon Sep 17 00:00:00 2001 From: Mac L Date: Tue, 6 Jul 2021 02:38:53 +0000 Subject: [PATCH] Use forwards iterator for state root lookups (#2422) ## Issue Addressed #2377 ## Proposed Changes Implement the same code used for block root lookups (from #2376) to state root lookups in order to improve performance and reduce associated memory spikes (e.g. from certain HTTP API requests). ## Additional Changes - Tests using `rev_iter_state_roots` and `rev_iter_block_roots` have been refactored to use their `forwards` versions instead. - The `rev_iter_state_roots` and `rev_iter_block_roots` functions are now unused and have been removed. - The `state_at_slot` function has been changed to use the `forwards` iterator. ## Additional Info - Some tests still need to be refactored to use their `forwards_iter` versions. These tests start their iteration from a specific beacon state and thus use the `rev_iter_state_roots_from` and `rev_iter_block_roots_from` functions. If they can be refactored, those functions can also be removed. --- beacon_node/beacon_chain/src/beacon_chain.rs | 114 +++++++----- beacon_node/beacon_chain/tests/store_tests.rs | 46 +++-- beacon_node/beacon_chain/tests/tests.rs | 20 +-- beacon_node/http_api/tests/tests.rs | 5 +- beacon_node/store/src/forwards_iter.rs | 170 +++++++++++++++++- beacon_node/store/src/hot_cold_store.rs | 12 +- 6 files changed, 283 insertions(+), 84 deletions(-) diff --git a/beacon_node/beacon_chain/src/beacon_chain.rs b/beacon_node/beacon_chain/src/beacon_chain.rs index 42ea3652e..331daf321 100644 --- a/beacon_node/beacon_chain/src/beacon_chain.rs +++ b/beacon_node/beacon_chain/src/beacon_chain.rs @@ -390,29 +390,15 @@ impl BeaconChain { .map(|slot| slot.epoch(T::EthSpec::slots_per_epoch())) } - /// Iterates across all `(block_root, slot)` pairs from the head of the chain (inclusive) to - /// the earliest reachable ancestor (may or may not be genesis). + /// Iterates across all `(block_root, slot)` pairs from `start_slot` + /// to the head of the chain (inclusive). /// /// ## Notes /// - /// `slot` always decreases by `1`. + /// - `slot` always increases by `1`. /// - Skipped slots contain the root of the closest prior - /// non-skipped slot (identical to the way they are stored in `state.block_roots`) . + /// non-skipped slot (identical to the way they are stored in `state.block_roots`). /// - Iterator returns `(Hash256, Slot)`. - /// - As this iterator starts at the `head` of the chain (viz., the best block), the first slot - /// returned may be earlier than the wall-clock slot. - pub fn rev_iter_block_roots( - &self, - ) -> Result>, Error> { - let head = self.head()?; - let iter = BlockRootsIterator::owned(self.store.clone(), head.beacon_state); - Ok( - std::iter::once(Ok((head.beacon_block_root, head.beacon_block.slot()))) - .chain(iter) - .map(|result| result.map_err(|e| e.into())), - ) - } - pub fn forwards_iter_block_roots( &self, start_slot: Slot, @@ -434,7 +420,7 @@ impl BeaconChain { /// /// ## Notes /// - /// `slot` always decreases by `1`. + /// - `slot` always decreases by `1`. /// - Skipped slots contain the root of the closest prior /// non-skipped slot (identical to the way they are stored in `state.block_roots`) . /// - Iterator returns `(Hash256, Slot)`. @@ -526,29 +512,15 @@ impl BeaconChain { }) } - /// Iterates across all `(state_root, slot)` pairs from the head of the chain (inclusive) to - /// the earliest reachable ancestor (may or may not be genesis). + /// Iterates backwards across all `(state_root, slot)` pairs starting from + /// an arbitrary `BeaconState` to the earliest reachable ancestor (may or may not be genesis). /// /// ## Notes /// - /// `slot` always decreases by `1`. + /// - `slot` always decreases by `1`. /// - Iterator returns `(Hash256, Slot)`. /// - As this iterator starts at the `head` of the chain (viz., the best block), the first slot /// returned may be earlier than the wall-clock slot. - pub fn rev_iter_state_roots( - &self, - ) -> Result>, Error> { - let head = self.head()?; - let head_slot = head.beacon_state.slot; - let head_state_root = head.beacon_state_root(); - let iter = StateRootsIterator::owned(self.store.clone(), head.beacon_state); - let iter = std::iter::once(Ok((head_state_root, head_slot))) - .chain(iter) - .map(|result| result.map_err(Into::into)); - Ok(iter) - } - - /// As for `rev_iter_state_roots` but starting from an arbitrary `BeaconState`. pub fn rev_iter_state_roots_from<'a>( &self, state_root: Hash256, @@ -559,6 +531,30 @@ impl BeaconChain { .map(|result| result.map_err(Into::into)) } + /// Iterates across all `(state_root, slot)` pairs from `start_slot` + /// to the head of the chain (inclusive). + /// + /// ## Notes + /// + /// - `slot` always increases by `1`. + /// - Iterator returns `(Hash256, Slot)`. + pub fn forwards_iter_state_roots( + &self, + start_slot: Slot, + ) -> Result>, Error> { + let local_head = self.head()?; + + let iter = HotColdDB::forwards_state_roots_iterator( + self.store.clone(), + start_slot, + local_head.beacon_state_root(), + local_head.beacon_state, + &self.spec, + )?; + + Ok(iter.map(|result| result.map_err(Into::into))) + } + /// Returns the block at the given slot, if any. Only returns blocks in the canonical chain. /// /// Use the `skips` parameter to define the behaviour when `request_slot` is a skipped slot. @@ -580,16 +576,48 @@ impl BeaconChain { } } - /// Returns the block at the given slot, if any. Only returns blocks in the canonical chain. + /// Returns the state root at the given slot, if any. Only returns state roots in the canonical chain. /// /// ## Errors /// /// May return a database error. - pub fn state_root_at_slot(&self, slot: Slot) -> Result, Error> { - process_results(self.rev_iter_state_roots()?, |mut iter| { - iter.find(|(_, this_slot)| *this_slot == slot) - .map(|(root, _)| root) - }) + pub fn state_root_at_slot(&self, request_slot: Slot) -> Result, Error> { + if request_slot > self.slot()? { + return Ok(None); + } else if request_slot == self.spec.genesis_slot { + return Ok(Some(self.genesis_state_root)); + } + + // Try an optimized path of reading the root directly from the head state. + let fast_lookup: Option = self.with_head(|head| { + if head.beacon_block.slot() <= request_slot { + // Return the head state root if all slots between the request and the head are skipped. + Ok(Some(head.beacon_state_root())) + } else if let Ok(root) = head.beacon_state.get_state_root(request_slot) { + // Return the root if it's easily accessible from the head state. + Ok(Some(*root)) + } else { + // Fast lookup is not possible. + Ok::<_, Error>(None) + } + })?; + + if let Some(root) = fast_lookup { + return Ok(Some(root)); + } + + process_results(self.forwards_iter_state_roots(request_slot)?, |mut iter| { + if let Some((root, slot)) = iter.next() { + if slot == request_slot { + Ok(Some(root)) + } else { + // Sanity check. + Err(Error::InconsistentForwardsIter { request_slot, slot }) + } + } else { + Ok(None) + } + })? } /// Returns the block root at the given slot, if any. Only returns roots in the canonical chain. @@ -896,7 +924,7 @@ impl BeaconChain { Ok(state) } Ordering::Less => { - let state_root = process_results(self.rev_iter_state_roots()?, |iter| { + let state_root = process_results(self.forwards_iter_state_roots(slot)?, |iter| { iter.take_while(|(_, current_slot)| *current_slot >= slot) .find(|(_, current_slot)| *current_slot == slot) .map(|(root, _slot)| root) diff --git a/beacon_node/beacon_chain/tests/store_tests.rs b/beacon_node/beacon_chain/tests/store_tests.rs index a25663178..73c207238 100644 --- a/beacon_node/beacon_chain/tests/store_tests.rs +++ b/beacon_node/beacon_chain/tests/store_tests.rs @@ -309,7 +309,7 @@ fn epoch_boundary_state_attestation_processing() { let finalized_epoch = harness .chain .head_info() - .expect("head ok") + .expect("should get head") .finalized_checkpoint .epoch; @@ -444,8 +444,8 @@ fn delete_blocks_and_states() { let split_slot = store.get_split_slot(); let finalized_states = harness .chain - .rev_iter_state_roots() - .expect("rev iter ok") + .forwards_iter_state_roots(Slot::new(0)) + .expect("should get iter") .map(Result::unwrap); for (state_root, slot) in finalized_states { @@ -706,7 +706,7 @@ fn check_shuffling_compatible( { let (block_root, slot) = maybe_tuple.unwrap(); // Shuffling is compatible targeting the current epoch, - // iff slot is greater than or equal to the current epoch pivot block + // if slot is greater than or equal to the current epoch pivot block. assert_eq!( harness.chain.shuffling_is_compatible( &block_root, @@ -1671,7 +1671,7 @@ fn pruning_test( let all_canonical_states = harness .chain - .rev_iter_state_roots() + .forwards_iter_state_roots(Slot::new(0)) .unwrap() .map(Result::unwrap) .map(|(state_root, _)| state_root.into()) @@ -1799,17 +1799,12 @@ fn check_chain_dump(harness: &TestHarness, expected_len: u64) { .map(|checkpoint| (checkpoint.beacon_block_root, checkpoint.beacon_block.slot())) .collect::>(); - let head = harness.chain.head().expect("should get head"); - let mut forward_block_roots = HotColdDB::forwards_block_roots_iterator( - harness.chain.store.clone(), - Slot::new(0), - head.beacon_state, - head.beacon_block_root, - &harness.spec, - ) - .unwrap() - .map(Result::unwrap) - .collect::>(); + let mut forward_block_roots = harness + .chain + .forwards_iter_block_roots(Slot::new(0)) + .expect("should get iter") + .map(Result::unwrap) + .collect::>(); // Drop the block roots for skipped slots. forward_block_roots.dedup_by_key(|(block_root, _)| *block_root); @@ -1827,10 +1822,10 @@ fn check_chain_dump(harness: &TestHarness, expected_len: u64) { /// Check that every state from the canonical chain is in the database, and that the /// reverse state and block root iterators reach genesis. fn check_iterators(harness: &TestHarness) { - let mut min_slot = None; + let mut max_slot = None; for (state_root, slot) in harness .chain - .rev_iter_state_roots() + .forwards_iter_state_roots(Slot::new(0)) .expect("should get iter") .map(Result::unwrap) { @@ -1844,20 +1839,23 @@ fn check_iterators(harness: &TestHarness) { "state {:?} from canonical chain should be in DB", state_root ); - min_slot = Some(slot); + max_slot = Some(slot); } - // Assert that we reached genesis. - assert_eq!(min_slot, Some(Slot::new(0))); - // Assert that the block root iterator reaches genesis. + // Assert that we reached the head. + assert_eq!( + max_slot, + Some(harness.chain.head_info().expect("should get head").slot) + ); + // Assert that the block root iterator reaches the head. assert_eq!( harness .chain - .rev_iter_block_roots() + .forwards_iter_block_roots(Slot::new(0)) .expect("should get iter") .last() .map(Result::unwrap) .map(|(_, slot)| slot), - Some(Slot::new(0)) + Some(harness.chain.head_info().expect("should get head").slot) ); } diff --git a/beacon_node/beacon_chain/tests/tests.rs b/beacon_node/beacon_chain/tests/tests.rs index a81663003..c6bf515a3 100644 --- a/beacon_node/beacon_chain/tests/tests.rs +++ b/beacon_node/beacon_chain/tests/tests.rs @@ -77,13 +77,13 @@ fn iterators() { let block_roots: Vec<(Hash256, Slot)> = harness .chain - .rev_iter_block_roots() + .forwards_iter_block_roots(Slot::new(0)) .expect("should get iter") .map(Result::unwrap) .collect(); let state_roots: Vec<(Hash256, Slot)> = harness .chain - .rev_iter_state_roots() + .forwards_iter_state_roots(Slot::new(0)) .expect("should get iter") .map(Result::unwrap) .collect(); @@ -112,30 +112,30 @@ fn iterators() { block_roots.windows(2).for_each(|x| { assert_eq!( x[1].1, - x[0].1 - 1, - "block root slots should be decreasing by one" + x[0].1 + 1, + "block root slots should be increasing by one" ) }); state_roots.windows(2).for_each(|x| { assert_eq!( x[1].1, - x[0].1 - 1, - "state root slots should be decreasing by one" + x[0].1 + 1, + "state root slots should be increasing by one" ) }); let head = &harness.chain.head().expect("should get head"); assert_eq!( - *block_roots.first().expect("should have some block roots"), + *block_roots.last().expect("should have some block roots"), (head.beacon_block_root, head.beacon_block.slot()), - "first block root and slot should be for the head block" + "last block root and slot should be for the head block" ); assert_eq!( - *state_roots.first().expect("should have some state roots"), + *state_roots.last().expect("should have some state roots"), (head.beacon_state_root(), head.beacon_state.slot), - "first state root and slot should be for the head state" + "last state root and slot should be for the head state" ); } diff --git a/beacon_node/http_api/tests/tests.rs b/beacon_node/http_api/tests/tests.rs index fb9e74cec..558fedb06 100644 --- a/beacon_node/http_api/tests/tests.rs +++ b/beacon_node/http_api/tests/tests.rs @@ -861,13 +861,10 @@ impl ApiTester { pub async fn test_beacon_headers_all_parents(self) -> Self { let mut roots = self .chain - .rev_iter_block_roots() + .forwards_iter_block_roots(Slot::new(0)) .unwrap() .map(Result::unwrap) .map(|(root, _slot)| root) - .collect::>() - .into_iter() - .rev() .collect::>(); // The iterator natively returns duplicate roots for skipped slots. diff --git a/beacon_node/store/src/forwards_iter.rs b/beacon_node/store/src/forwards_iter.rs index fb3a0480f..cec57e64e 100644 --- a/beacon_node/store/src/forwards_iter.rs +++ b/beacon_node/store/src/forwards_iter.rs @@ -1,7 +1,7 @@ use crate::chunked_iter::ChunkedVectorIter; -use crate::chunked_vector::BlockRoots; +use crate::chunked_vector::{BlockRoots, StateRoots}; use crate::errors::{Error, Result}; -use crate::iter::BlockRootsIterator; +use crate::iter::{BlockRootsIterator, StateRootsIterator}; use crate::{HotColdDB, ItemStore}; use itertools::process_results; use std::sync::Arc; @@ -172,3 +172,169 @@ impl, Cold: ItemStore> Iterator self.do_next().transpose() } } + +/// Forwards state roots iterator that makes use of the `state_roots` table in the freezer DB. +pub struct FrozenForwardsStateRootsIterator, Cold: ItemStore> { + inner: ChunkedVectorIter, +} + +/// Forwards state roots iterator that reverses a backwards iterator (only good for short ranges). +pub struct SimpleForwardsStateRootsIterator { + // Values from the backwards iterator (in slot descending order) + values: Vec<(Hash256, Slot)>, +} + +/// Fusion of the above two approaches to forwards iteration. Fast and efficient. +pub enum HybridForwardsStateRootsIterator, Cold: ItemStore> { + PreFinalization { + iter: Box>, + /// Data required by the `PostFinalization` iterator when we get to it. + continuation_data: Box, Hash256)>>, + }, + PostFinalization { + iter: SimpleForwardsStateRootsIterator, + }, +} + +impl, Cold: ItemStore> + FrozenForwardsStateRootsIterator +{ + pub fn new( + store: Arc>, + start_slot: Slot, + last_restore_point_slot: Slot, + spec: &ChainSpec, + ) -> Self { + Self { + inner: ChunkedVectorIter::new( + store, + start_slot.as_usize(), + last_restore_point_slot, + spec, + ), + } + } +} + +impl, Cold: ItemStore> Iterator + for FrozenForwardsStateRootsIterator +{ + type Item = (Hash256, Slot); + + fn next(&mut self) -> Option { + self.inner + .next() + .map(|(slot, state_hash)| (state_hash, Slot::from(slot))) + } +} + +impl SimpleForwardsStateRootsIterator { + pub fn new, Cold: ItemStore>( + store: Arc>, + start_slot: Slot, + end_state: BeaconState, + end_state_root: Hash256, + ) -> Result { + // Iterate backwards from the end state, stopping at the start slot. + let values = process_results( + std::iter::once(Ok((end_state_root, end_state.slot))) + .chain(StateRootsIterator::owned(store, end_state)), + |iter| { + iter.take_while(|(_, slot)| *slot >= start_slot) + .collect::>() + }, + )?; + Ok(Self { values }) + } +} + +impl Iterator for SimpleForwardsStateRootsIterator { + type Item = Result<(Hash256, Slot)>; + + fn next(&mut self) -> Option { + // Pop from the end of the vector to get the state roots in slot-ascending order. + Ok(self.values.pop()).transpose() + } +} + +impl, Cold: ItemStore> + HybridForwardsStateRootsIterator +{ + pub fn new( + store: Arc>, + start_slot: Slot, + end_state: BeaconState, + end_state_root: Hash256, + spec: &ChainSpec, + ) -> Result { + use HybridForwardsStateRootsIterator::*; + + let latest_restore_point_slot = store.get_latest_restore_point_slot(); + + let result = if start_slot < latest_restore_point_slot { + PreFinalization { + iter: Box::new(FrozenForwardsStateRootsIterator::new( + store, + start_slot, + latest_restore_point_slot, + spec, + )), + continuation_data: Box::new(Some((end_state, end_state_root))), + } + } else { + PostFinalization { + iter: SimpleForwardsStateRootsIterator::new( + store, + start_slot, + end_state, + end_state_root, + )?, + } + }; + + Ok(result) + } + + fn do_next(&mut self) -> Result> { + use HybridForwardsStateRootsIterator::*; + + match self { + PreFinalization { + iter, + continuation_data, + } => { + match iter.next() { + Some(x) => Ok(Some(x)), + // Once the pre-finalization iterator is consumed, transition + // to a post-finalization iterator beginning from the last slot + // of the pre iterator. + None => { + let (end_state, end_state_root) = + continuation_data.take().ok_or(Error::NoContinuationData)?; + + *self = PostFinalization { + iter: SimpleForwardsStateRootsIterator::new( + iter.inner.store.clone(), + Slot::from(iter.inner.end_vindex), + end_state, + end_state_root, + )?, + }; + self.do_next() + } + } + } + PostFinalization { iter } => iter.next().transpose(), + } + } +} + +impl, Cold: ItemStore> Iterator + for HybridForwardsStateRootsIterator +{ + type Item = Result<(Hash256, Slot)>; + + fn next(&mut self) -> Option { + self.do_next().transpose() + } +} diff --git a/beacon_node/store/src/hot_cold_store.rs b/beacon_node/store/src/hot_cold_store.rs index 97c954c99..6489301c9 100644 --- a/beacon_node/store/src/hot_cold_store.rs +++ b/beacon_node/store/src/hot_cold_store.rs @@ -2,7 +2,7 @@ use crate::chunked_vector::{ store_updated_vector, BlockRoots, HistoricalRoots, RandaoMixes, StateRoots, }; use crate::config::{OnDiskStoreConfig, StoreConfig}; -use crate::forwards_iter::HybridForwardsBlockRootsIterator; +use crate::forwards_iter::{HybridForwardsBlockRootsIterator, HybridForwardsStateRootsIterator}; use crate::impls::beacon_state::{get_full_state, store_full_state}; use crate::iter::{ParentRootBlockIterator, StateRootsIterator}; use crate::leveldb_store::BytesKey; @@ -393,6 +393,16 @@ impl, Cold: ItemStore> HotColdDB HybridForwardsBlockRootsIterator::new(store, start_slot, end_state, end_block_root, spec) } + pub fn forwards_state_roots_iterator( + store: Arc, + start_slot: Slot, + end_state_root: Hash256, + end_state: BeaconState, + spec: &ChainSpec, + ) -> Result>, Error> { + HybridForwardsStateRootsIterator::new(store, start_slot, end_state, end_state_root, spec) + } + /// Load an epoch boundary state by using the hot state summary look-up. /// /// Will fall back to the cold DB if a hot state summary is not found.