From 6d8d212da84a232ec9711be5e18c13d93881eb93 Mon Sep 17 00:00:00 2001
From: int88 <golden-miner@qq.com>
Date: Fri, 5 May 2023 00:51:57 +0000
Subject: [PATCH] use state cache to optimise historical state lookup (#4228)

## Issue Addressed

#3873

## Proposed Changes

add a cache to optimise historical state lookup.

## Additional Info

N/A


Co-authored-by: Michael Sproul <micsproul@gmail.com>
---
 beacon_node/src/cli.rs                  |  7 ++++
 beacon_node/src/config.rs               |  6 +++
 beacon_node/store/src/config.rs         |  4 ++
 beacon_node/store/src/hot_cold_store.rs | 50 +++++++++++++++++++++----
 book/src/advanced_database.md           | 10 +++++
 book/src/api-lighthouse.md              |  1 +
 book/src/database-migrations.md         |  1 +
 lighthouse/tests/beacon_node.rs         | 19 ++++++++++
 8 files changed, 90 insertions(+), 8 deletions(-)

diff --git a/beacon_node/src/cli.rs b/beacon_node/src/cli.rs
index a578ac7ea..61a11f88e 100644
--- a/beacon_node/src/cli.rs
+++ b/beacon_node/src/cli.rs
@@ -519,6 +519,13 @@ pub fn cli_app<'a, 'b>() -> App<'a, 'b> {
                 .help("Specifies how many blocks the database should cache in memory [default: 5]")
                 .takes_value(true)
         )
+        .arg(
+            Arg::with_name("historic-state-cache-size")
+                .long("historic-state-cache-size")
+                .value_name("SIZE")
+                .help("Specifies how many states from the freezer database should cache in memory [default: 1]")
+                .takes_value(true)
+        )
         /*
          * Execution Layer Integration
          */
diff --git a/beacon_node/src/config.rs b/beacon_node/src/config.rs
index 9b1b9ad1a..64cfb0fed 100644
--- a/beacon_node/src/config.rs
+++ b/beacon_node/src/config.rs
@@ -383,6 +383,12 @@ pub fn get_config<E: EthSpec>(
             .map_err(|_| "block-cache-size is not a valid integer".to_string())?;
     }
 
+    if let Some(historic_state_cache_size) = cli_args.value_of("historic-state-cache-size") {
+        client_config.store.historic_state_cache_size = historic_state_cache_size
+            .parse()
+            .map_err(|_| "historic-state-cache-size is not a valid integer".to_string())?;
+    }
+
     client_config.store.compact_on_init = cli_args.is_present("compact-db");
     if let Some(compact_on_prune) = cli_args.value_of("auto-compact-db") {
         client_config.store.compact_on_prune = compact_on_prune
diff --git a/beacon_node/store/src/config.rs b/beacon_node/store/src/config.rs
index 027b8152e..581003b4f 100644
--- a/beacon_node/store/src/config.rs
+++ b/beacon_node/store/src/config.rs
@@ -7,6 +7,7 @@ use types::{EthSpec, MinimalEthSpec};
 pub const PREV_DEFAULT_SLOTS_PER_RESTORE_POINT: u64 = 2048;
 pub const DEFAULT_SLOTS_PER_RESTORE_POINT: u64 = 8192;
 pub const DEFAULT_BLOCK_CACHE_SIZE: usize = 5;
+pub const DEFAULT_HISTORIC_STATE_CACHE_SIZE: usize = 1;
 
 /// Database configuration parameters.
 #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
@@ -17,6 +18,8 @@ pub struct StoreConfig {
     pub slots_per_restore_point_set_explicitly: bool,
     /// Maximum number of blocks to store in the in-memory block cache.
     pub block_cache_size: usize,
+    /// Maximum number of states from freezer database to store in the in-memory state cache.
+    pub historic_state_cache_size: usize,
     /// Whether to compact the database on initialization.
     pub compact_on_init: bool,
     /// Whether to compact the database during database pruning.
@@ -43,6 +46,7 @@ impl Default for StoreConfig {
             slots_per_restore_point: MinimalEthSpec::slots_per_historical_root() as u64,
             slots_per_restore_point_set_explicitly: false,
             block_cache_size: DEFAULT_BLOCK_CACHE_SIZE,
+            historic_state_cache_size: DEFAULT_HISTORIC_STATE_CACHE_SIZE,
             compact_on_init: false,
             compact_on_prune: true,
             prune_payloads: true,
diff --git a/beacon_node/store/src/hot_cold_store.rs b/beacon_node/store/src/hot_cold_store.rs
index 02608f9a0..70fb22742 100644
--- a/beacon_node/store/src/hot_cold_store.rs
+++ b/beacon_node/store/src/hot_cold_store.rs
@@ -62,6 +62,8 @@ pub struct HotColdDB<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> {
     pub hot_db: Hot,
     /// LRU cache of deserialized blocks. Updated whenever a block is loaded.
     block_cache: Mutex<LruCache<Hash256, SignedBeaconBlock<E>>>,
+    /// LRU cache of replayed states.
+    state_cache: Mutex<LruCache<Slot, BeaconState<E>>>,
     /// Chain spec.
     pub(crate) spec: ChainSpec,
     /// Logger.
@@ -129,6 +131,7 @@ impl<E: EthSpec> HotColdDB<E, MemoryStore<E>, MemoryStore<E>> {
             cold_db: MemoryStore::open(),
             hot_db: MemoryStore::open(),
             block_cache: Mutex::new(LruCache::new(config.block_cache_size)),
+            state_cache: Mutex::new(LruCache::new(config.historic_state_cache_size)),
             config,
             spec,
             log,
@@ -162,6 +165,7 @@ impl<E: EthSpec> HotColdDB<E, LevelDB<E>, LevelDB<E>> {
             cold_db: LevelDB::open(cold_path)?,
             hot_db: LevelDB::open(hot_path)?,
             block_cache: Mutex::new(LruCache::new(config.block_cache_size)),
+            state_cache: Mutex::new(LruCache::new(config.historic_state_cache_size)),
             config,
             spec,
             log,
@@ -977,40 +981,70 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> HotColdDB<E, Hot, Cold>
 
     /// Load a frozen state that lies between restore points.
     fn load_cold_intermediate_state(&self, slot: Slot) -> Result<BeaconState<E>, Error> {
+        if let Some(state) = self.state_cache.lock().get(&slot) {
+            return Ok(state.clone());
+        }
+
         // 1. Load the restore points either side of the intermediate state.
         let low_restore_point_idx = slot.as_u64() / self.config.slots_per_restore_point;
         let high_restore_point_idx = low_restore_point_idx + 1;
 
+        // Use low restore point as the base state.
+        let mut low_slot: Slot =
+            Slot::new(low_restore_point_idx * self.config.slots_per_restore_point);
+        let mut low_state: Option<BeaconState<E>> = None;
+
+        // Try to get a more recent state from the cache to avoid massive blocks replay.
+        for (s, state) in self.state_cache.lock().iter() {
+            if s.as_u64() / self.config.slots_per_restore_point == low_restore_point_idx
+                && *s < slot
+                && low_slot < *s
+            {
+                low_slot = *s;
+                low_state = Some(state.clone());
+            }
+        }
+
+        // If low_state is still None, use load_restore_point_by_index to load the state.
+        let low_state = match low_state {
+            Some(state) => state,
+            None => self.load_restore_point_by_index(low_restore_point_idx)?,
+        };
+
         // Acquire the read lock, so that the split can't change while this is happening.
         let split = self.split.read_recursive();
 
-        let low_restore_point = self.load_restore_point_by_index(low_restore_point_idx)?;
         let high_restore_point = self.get_restore_point(high_restore_point_idx, &split)?;
 
-        // 2. Load the blocks from the high restore point back to the low restore point.
+        // 2. Load the blocks from the high restore point back to the low point.
         let blocks = self.load_blocks_to_replay(
-            low_restore_point.slot(),
+            low_slot,
             slot,
             self.get_high_restore_point_block_root(&high_restore_point, slot)?,
         )?;
 
-        // 3. Replay the blocks on top of the low restore point.
+        // 3. Replay the blocks on top of the low point.
         // Use a forwards state root iterator to avoid doing any tree hashing.
         // The state root of the high restore point should never be used, so is safely set to 0.
         let state_root_iter = self.forwards_state_roots_iterator_until(
-            low_restore_point.slot(),
+            low_slot,
             slot,
             || (high_restore_point, Hash256::zero()),
             &self.spec,
         )?;
 
-        self.replay_blocks(
-            low_restore_point,
+        let state = self.replay_blocks(
+            low_state,
             blocks,
             slot,
             Some(state_root_iter),
             StateRootStrategy::Accurate,
-        )
+        )?;
+
+        // If state is not error, put it in the cache.
+        self.state_cache.lock().put(slot, state.clone());
+
+        Ok(state)
     }
 
     /// Get the restore point with the given index, or if it is out of bounds, the split state.
diff --git a/book/src/advanced_database.md b/book/src/advanced_database.md
index 397d9a28b..57e49531c 100644
--- a/book/src/advanced_database.md
+++ b/book/src/advanced_database.md
@@ -58,6 +58,16 @@ the `--slots-per-restore-point` flag:
 lighthouse beacon_node --slots-per-restore-point 32
 ```
 
+### Historic state cache
+
+Lighthouse includes a cache to avoid repeatedly replaying blocks when loading historic states. Lighthouse will cache a limited number of reconstructed states and will re-use them when serving requests for subsequent states at higher slots. This greatly reduces the cost of requesting several states in order, and we recommend that applications like block explorers take advantage of this cache.
+
+The historical state cache size can be specified with the flag `--historic-state-cache-size` (default value is 1):
+
+```bash
+lighthouse beacon_node --historic-state-cache-size 4
+```
+
 ## Glossary
 
 * _Freezer DB_: part of the database storing finalized states. States are stored in a sparser
diff --git a/book/src/api-lighthouse.md b/book/src/api-lighthouse.md
index 1fa7a1397..e67a79c8f 100644
--- a/book/src/api-lighthouse.md
+++ b/book/src/api-lighthouse.md
@@ -456,6 +456,7 @@ curl "http://localhost:5052/lighthouse/database/info" | jq
   "config": {
     "slots_per_restore_point": 2048,
     "block_cache_size": 5,
+    "historic_state_cache_size": 1,
     "compact_on_init": false,
     "compact_on_prune": true
   },
diff --git a/book/src/database-migrations.md b/book/src/database-migrations.md
index d2b7b518d..5e0b89635 100644
--- a/book/src/database-migrations.md
+++ b/book/src/database-migrations.md
@@ -92,6 +92,7 @@ curl "http://localhost:5052/lighthouse/database/info"
     "slots_per_restore_point": 8192,
     "slots_per_restore_point_set_explicitly": true,
     "block_cache_size": 5,
+    "historic_state_cache_size": 1,
     "compact_on_init": false,
     "compact_on_prune": true
   }
diff --git a/lighthouse/tests/beacon_node.rs b/lighthouse/tests/beacon_node.rs
index 9dd67eadc..087606111 100644
--- a/lighthouse/tests/beacon_node.rs
+++ b/lighthouse/tests/beacon_node.rs
@@ -1669,6 +1669,25 @@ fn block_cache_size_flag() {
         .with_config(|config| assert_eq!(config.store.block_cache_size, 4_usize));
 }
 #[test]
+fn historic_state_cache_size_flag() {
+    CommandLineTest::new()
+        .flag("historic-state-cache-size", Some("4"))
+        .run_with_zero_port()
+        .with_config(|config| assert_eq!(config.store.historic_state_cache_size, 4_usize));
+}
+#[test]
+fn historic_state_cache_size_default() {
+    use beacon_node::beacon_chain::store::config::DEFAULT_HISTORIC_STATE_CACHE_SIZE;
+    CommandLineTest::new()
+        .run_with_zero_port()
+        .with_config(|config| {
+            assert_eq!(
+                config.store.historic_state_cache_size,
+                DEFAULT_HISTORIC_STATE_CACHE_SIZE
+            );
+        });
+}
+#[test]
 fn auto_compact_db_flag() {
     CommandLineTest::new()
         .flag("auto-compact-db", Some("false"))