From 229f883968b8f6a9ead66a6aa6ffcff36a496455 Mon Sep 17 00:00:00 2001
From: Michael Sproul <michael@sigmaprime.io>
Date: Wed, 25 May 2022 03:27:30 +0000
Subject: [PATCH] Avoid parallel fork choice runs during sync (#3217)

## Issue Addressed

Fixes an issue that @paulhauner found with the v2.3.0 release candidate whereby the fork choice runs introduced by #3168 tripped over each other during sync:

```
May 24 23:06:40.542 WARN Error signalling fork choice waiter     slot: 3884129, error: ForkChoiceSignalOutOfOrder { current: Slot(3884131), latest: Slot(3884129) }, service: beacon
```

This can occur because fork choice is called from the state advance _and_ the per-slot task. When one of these runs takes a long time it can end up finishing after a run from a later slot, tripping the error above. The problem is resolved by not running either of these fork choice calls during sync.

Additionally, these parallel fork choice runs were causing issues in the database:

```
May 24 07:49:05.098 WARN Found a chain that should already have been pruned, head_slot: 92925, head_block_root: 0xa76c7bf1b98e54ed4b0d8686efcfdf853484e6c2a4c67e91cbf19e5ad1f96b17, service: beacon
May 24 07:49:05.101 WARN Database migration failed               error: HotColdDBError(FreezeSlotError { current_split_slot: Slot(92608), proposed_split_slot: Slot(92576) }), service: beacon
```

In this case, two fork choice calls triggering the finalization processing were being processed out of order due to differences in their processing time, causing the background migrator to try to advance finalization _backwards_ :flushed:. Removing the parallel fork choice runs from sync effectively addresses the issue, because these runs are most likely to have different finalized checkpoints (because of the speed at which fork choice advances during sync). In theory it's still possible to process updates out of order if any other fork choice runs end up completing out of order, but this should be much less common. Fixing out of order fork choice runs in general is difficult as it requires architectural changes like serialising fork choice updates through a single thread, or locking fork choice along with the head when it is mutated (https://github.com/sigp/lighthouse/pull/3175).

## Proposed Changes

* Don't run per-slot fork choice during sync (if head is older than 4 slots)
* Don't run state-advance fork choice during sync (if head is older than 4 slots)
* Check for monotonic finalization updates in the background migrator. This is a good defensive check to have, and I'm not sure why we didn't have it before (we may have had it and wrongly removed it).
---
 beacon_node/beacon_chain/src/beacon_chain.rs  | 21 ++++++++--
 beacon_node/beacon_chain/src/migrate.rs       | 41 +++++++++++++++++--
 .../beacon_chain/src/state_advance_timer.rs   | 10 +++++
 3 files changed, 65 insertions(+), 7 deletions(-)
diff --git a/beacon_node/beacon_chain/src/beacon_chain.rs b/beacon_node/beacon_chain/src/beacon_chain.rs
index a16446019..5d2b35727 100644
--- a/beacon_node/beacon_chain/src/beacon_chain.rs
+++ b/beacon_node/beacon_chain/src/beacon_chain.rs
@@ -123,6 +123,12 @@ const EARLY_ATTESTER_CACHE_HISTORIC_SLOTS: u64 = 4;
 /// If the head block is older than this value, don't bother preparing beacon proposers.
 const PREPARE_PROPOSER_HISTORIC_EPOCHS: u64 = 4;
 
+/// If the head is more than `MAX_PER_SLOT_FORK_CHOICE_DISTANCE` slots behind the wall-clock slot, DO NOT
+/// run the per-slot tasks (primarily fork choice).
+///
+/// This prevents unnecessary work during sync.
+const MAX_PER_SLOT_FORK_CHOICE_DISTANCE: u64 = 4;
+
 /// Reported to the user when the justified block has an invalid execution payload.
 pub const INVALID_JUSTIFIED_PAYLOAD_SHUTDOWN_REASON: &str =
     "Justified block has an invalid execution payload.";
@@ -4412,6 +4418,18 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
     pub fn per_slot_task(self: &Arc<Self>) {
         trace!(self.log, "Running beacon chain per slot tasks");
         if let Some(slot) = self.slot_clock.now() {
+            // Always run the light-weight pruning tasks (these structures should be empty during
+            // sync anyway).
+            self.naive_aggregation_pool.write().prune(slot);
+            self.block_times_cache.write().prune(slot);
+
+            // Don't run heavy-weight tasks during sync.
+            if self.best_slot().map_or(true, |head_slot| {
+                head_slot + MAX_PER_SLOT_FORK_CHOICE_DISTANCE < slot
+            }) {
+                return;
+            }
+
             // Run fork choice and signal to any waiting task that it has completed.
             if let Err(e) = self.fork_choice() {
                 error!(
@@ -4434,9 +4452,6 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
                     );
                 }
             }
-
-            self.naive_aggregation_pool.write().prune(slot);
-            self.block_times_cache.write().prune(slot);
         }
     }
 
diff --git a/beacon_node/beacon_chain/src/migrate.rs b/beacon_node/beacon_chain/src/migrate.rs
index 2c2ce0aa1..1c0d9c4ed 100644
--- a/beacon_node/beacon_chain/src/migrate.rs
+++ b/beacon_node/beacon_chain/src/migrate.rs
@@ -55,7 +55,13 @@ pub enum PruningOutcome {
     Successful {
         old_finalized_checkpoint: Checkpoint,
     },
-    DeferredConcurrentMutation,
+    /// The run was aborted because the new finalized checkpoint is older than the previous one.
+    OutOfOrderFinalization {
+        old_finalized_checkpoint: Checkpoint,
+        new_finalized_checkpoint: Checkpoint,
+    },
+    /// The run was aborted due to a concurrent mutation of the head tracker.
+    DeferredConcurrentHeadTrackerMutation,
 }
 
 /// Logic errors that can occur during pruning, none of these should ever happen.
@@ -68,6 +74,10 @@ pub enum PruningError {
     MissingInfoForCanonicalChain {
         slot: Slot,
     },
+    FinalizedStateOutOfOrder {
+        old_finalized_checkpoint: Checkpoint,
+        new_finalized_checkpoint: Checkpoint,
+    },
     UnexpectedEqualStateRoots,
     UnexpectedUnequalStateRoots,
 }
@@ -223,7 +233,7 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> BackgroundMigrator<E, Ho
             Ok(PruningOutcome::Successful {
                 old_finalized_checkpoint,
             }) => old_finalized_checkpoint,
-            Ok(PruningOutcome::DeferredConcurrentMutation) => {
+            Ok(PruningOutcome::DeferredConcurrentHeadTrackerMutation) => {
                 warn!(
                     log,
                     "Pruning deferred because of a concurrent mutation";
@@ -231,8 +241,21 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> BackgroundMigrator<E, Ho
                 );
                 return;
             }
+            Ok(PruningOutcome::OutOfOrderFinalization {
+                old_finalized_checkpoint,
+                new_finalized_checkpoint,
+            }) => {
+                warn!(
+                    log,
+                    "Ignoring out of order finalization request";
+                    "old_finalized_epoch" => old_finalized_checkpoint.epoch,
+                    "new_finalized_epoch" => new_finalized_checkpoint.epoch,
+                    "message" => "this is expected occasionally due to a (harmless) race condition"
+                );
+                return;
+            }
             Err(e) => {
-                warn!(log, "Block pruning failed"; "error" => format!("{:?}", e));
+                warn!(log, "Block pruning failed"; "error" => ?e);
                 return;
             }
         };
@@ -347,6 +370,16 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> BackgroundMigrator<E, Ho
             .into());
         }
 
+        // The new finalized state must be newer than the previous finalized state.
+        // I think this can happen sometimes currently due to `fork_choice` running in parallel
+        // with itself and sending us notifications out of order.
+        if old_finalized_slot > new_finalized_slot {
+            return Ok(PruningOutcome::OutOfOrderFinalization {
+                old_finalized_checkpoint,
+                new_finalized_checkpoint,
+            });
+        }
+
         debug!(
             log,
             "Starting database pruning";
@@ -523,7 +556,7 @@ impl<E: EthSpec, Hot: ItemStore<E>, Cold: ItemStore<E>> BackgroundMigrator<E, Ho
         // later.
         for head_hash in &abandoned_heads {
             if !head_tracker_lock.contains_key(head_hash) {
-                return Ok(PruningOutcome::DeferredConcurrentMutation);
+                return Ok(PruningOutcome::DeferredConcurrentHeadTrackerMutation);
             }
         }
 
diff --git a/beacon_node/beacon_chain/src/state_advance_timer.rs b/beacon_node/beacon_chain/src/state_advance_timer.rs
index 7216ac111..030507a83 100644
--- a/beacon_node/beacon_chain/src/state_advance_timer.rs
+++ b/beacon_node/beacon_chain/src/state_advance_timer.rs
@@ -37,6 +37,9 @@ use types::{AttestationShufflingId, EthSpec, Hash256, RelativeEpoch, Slot};
 /// for some period of time.
 const MAX_ADVANCE_DISTANCE: u64 = 4;
 
+/// Similarly for fork choice: avoid the fork choice lookahead during sync.
+const MAX_FORK_CHOICE_DISTANCE: u64 = 4;
+
 #[derive(Debug)]
 enum Error {
     BeaconChain(BeaconChainError),
@@ -212,6 +215,13 @@ async fn state_advance_timer<T: BeaconChainTypes>(
         let next_slot = current_slot + 1;
         executor.spawn_blocking(
             move || {
+                // Don't run fork choice during sync.
+                if beacon_chain.best_slot().map_or(true, |head_slot| {
+                    head_slot + MAX_FORK_CHOICE_DISTANCE < current_slot
+                }) {
+                    return;
+                }
+
                 if let Err(e) = beacon_chain.fork_choice_at_slot(next_slot) {
                     warn!(
                         log,