Add more checks and logging before genesis (#4730)

## Proposed Changes This PR adds more logging prior to genesis, particularly on networks that start with execution enabled. There are new checks using `eth_getBlockByHash/Number` to verify that the genesis state's `latest_execution_payload_header` matches the execution node's genesis block. The first commit also runs the merge-readiness/Capella-readiness checks prior to genesis. This has two effects: - Give more information on the execution node's status and its readiness for genesis. - Prevent the `el_offline` status from being set on `/eth/v1/node/syncing`, which previously caused the VC to complain loudly. I would like to include this for the Holesky reboot. It would have caught the misconfig that doomed the first Holesky. ## Additional Info - Geth doesn't serve payload bodies prior to genesis, which is why we use the legacy methods. I haven't checked with other ELs yet. - Currently this is logging errors with _Capella_ genesis states generated by `ethereum-genesis-generator` because the `withdrawals_root` is not set correctly (it is 0x0). This is not a blocker for Holesky, as it starts from Bellatrix (Pari is investigating).
2023-09-21 00:26:53 +00:00 · 2023-09-21 00:26:53 +00:00 · 5a35278aea
commit 5a35278aea
parent 1e9925435e
6 changed files with 209 additions and 10 deletions
--- a/beacon_node/beacon_chain/src/errors.rs
+++ b/beacon_node/beacon_chain/src/errors.rs
@ -146,6 +146,8 @@ pub enum BeaconChainError {
    BlockVariantLacksExecutionPayload(Hash256),
    ExecutionLayerErrorPayloadReconstruction(ExecutionBlockHash, Box<execution_layer::Error>),
    EngineGetCapabilititesFailed(Box<execution_layer::Error>),
+    ExecutionLayerGetBlockByNumberFailed(Box<execution_layer::Error>),
+    ExecutionLayerGetBlockByHashFailed(Box<execution_layer::Error>),
    BlockHashMissingFromExecutionLayer(ExecutionBlockHash),
    InconsistentPayloadReconstructed {
        slot: Slot,
--- a/beacon_node/beacon_chain/src/merge_readiness.rs
+++ b/beacon_node/beacon_chain/src/merge_readiness.rs
@ -1,8 +1,10 @@
 //! Provides tools for checking if a node is ready for the Bellatrix upgrade and following merge
 //! transition.

-use crate::{BeaconChain, BeaconChainTypes};
+use crate::{BeaconChain, BeaconChainError as Error, BeaconChainTypes};
+use execution_layer::BlockByNumberQuery;
 use serde::{Deserialize, Serialize, Serializer};
+use slog::debug;
 use std::fmt;
 use std::fmt::Write;
 use types::*;
@ -120,6 +122,25 @@ impl fmt::Display for MergeReadiness {
    }
 }

+pub enum GenesisExecutionPayloadStatus {
+    Correct(ExecutionBlockHash),
+    BlockHashMismatch {
+        got: ExecutionBlockHash,
+        expected: ExecutionBlockHash,
+    },
+    TransactionsRootMismatch {
+        got: Hash256,
+        expected: Hash256,
+    },
+    WithdrawalsRootMismatch {
+        got: Hash256,
+        expected: Hash256,
+    },
+    OtherMismatch,
+    Irrelevant,
+    AlreadyHappened,
+}
+
 impl<T: BeaconChainTypes> BeaconChain<T> {
    /// Returns `true` if user has an EL configured, or if the Bellatrix fork has occurred or will
    /// occur within `MERGE_READINESS_PREPARATION_SECONDS`.
@ -144,9 +165,9 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
    }

    /// Attempts to connect to the EL and confirm that it is ready for the merge.
-    pub async fn check_merge_readiness(&self) -> MergeReadiness {
+    pub async fn check_merge_readiness(&self, current_slot: Slot) -> MergeReadiness {
        if let Some(el) = self.execution_layer.as_ref() {
-            if !el.is_synced_for_notifier().await {
+            if !el.is_synced_for_notifier(current_slot).await {
                // The EL is not synced.
                return MergeReadiness::NotSynced;
            }
@ -161,6 +182,91 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
            MergeReadiness::NoExecutionEndpoint
        }
    }
+
+    /// Check that the execution payload embedded in the genesis state matches the EL's genesis
+    /// block.
+    pub async fn check_genesis_execution_payload_is_correct(
+        &self,
+    ) -> Result<GenesisExecutionPayloadStatus, Error> {
+        let head_snapshot = self.head_snapshot();
+        let genesis_state = &head_snapshot.beacon_state;
+
+        if genesis_state.slot() != 0 {
+            return Ok(GenesisExecutionPayloadStatus::AlreadyHappened);
+        }
+
+        let Ok(latest_execution_payload_header) = genesis_state.latest_execution_payload_header()
+        else {
+            return Ok(GenesisExecutionPayloadStatus::Irrelevant);
+        };
+        let fork = self.spec.fork_name_at_epoch(Epoch::new(0));
+
+        let execution_layer = self
+            .execution_layer
+            .as_ref()
+            .ok_or(Error::ExecutionLayerMissing)?;
+        let exec_block_hash = latest_execution_payload_header.block_hash();
+
+        // Use getBlockByNumber(0) to check that the block hash matches.
+        // At present, Geth does not respond to engine_getPayloadBodiesByRange before genesis.
+        let execution_block = execution_layer
+            .get_block_by_number(BlockByNumberQuery::Tag("0x0"))
+            .await
+            .map_err(|e| Error::ExecutionLayerGetBlockByNumberFailed(Box::new(e)))?
+            .ok_or(Error::BlockHashMissingFromExecutionLayer(exec_block_hash))?;
+
+        if execution_block.block_hash != exec_block_hash {
+            return Ok(GenesisExecutionPayloadStatus::BlockHashMismatch {
+                got: execution_block.block_hash,
+                expected: exec_block_hash,
+            });
+        }
+
+        // Double-check the block by reconstructing it.
+        let execution_payload = execution_layer
+            .get_payload_by_hash_legacy(exec_block_hash, fork)
+            .await
+            .map_err(|e| Error::ExecutionLayerGetBlockByHashFailed(Box::new(e)))?
+            .ok_or(Error::BlockHashMissingFromExecutionLayer(exec_block_hash))?;
+
+        // Verify payload integrity.
+        let header_from_payload = ExecutionPayloadHeader::from(execution_payload.to_ref());
+
+        let got_transactions_root = header_from_payload.transactions_root();
+        let expected_transactions_root = latest_execution_payload_header.transactions_root();
+        let got_withdrawals_root = header_from_payload.withdrawals_root().ok();
+        let expected_withdrawals_root = latest_execution_payload_header.withdrawals_root().ok();
+
+        if got_transactions_root != expected_transactions_root {
+            return Ok(GenesisExecutionPayloadStatus::TransactionsRootMismatch {
+                got: got_transactions_root,
+                expected: expected_transactions_root,
+            });
+        }
+
+        if let Some(&expected) = expected_withdrawals_root {
+            if let Some(&got) = got_withdrawals_root {
+                if got != expected {
+                    return Ok(GenesisExecutionPayloadStatus::WithdrawalsRootMismatch {
+                        got,
+                        expected,
+                    });
+                }
+            }
+        }
+
+        if header_from_payload.to_ref() != latest_execution_payload_header {
+            debug!(
+                self.log,
+                "Genesis execution payload reconstruction failure";
+                "consensus_node_header" => ?latest_execution_payload_header,
+                "execution_node_header" => ?header_from_payload
+            );
+            return Ok(GenesisExecutionPayloadStatus::OtherMismatch);
+        }
+
+        Ok(GenesisExecutionPayloadStatus::Correct(exec_block_hash))
+    }
 }

 /// Utility function to serialize a Uint256 as a decimal string.
--- a/beacon_node/client/src/notifier.rs
+++ b/beacon_node/client/src/notifier.rs
@ -1,7 +1,7 @@
 use crate::metrics;
 use beacon_chain::{
    capella_readiness::CapellaReadiness,
-    merge_readiness::{MergeConfig, MergeReadiness},
+    merge_readiness::{GenesisExecutionPayloadStatus, MergeConfig, MergeReadiness},
    BeaconChain, BeaconChainTypes, ExecutionStatus,
 };
 use lighthouse_network::{types::SyncState, NetworkGlobals};
@ -62,6 +62,9 @@ pub fn spawn_notifier<T: BeaconChainTypes>(
                        "wait_time" => estimated_time_pretty(Some(next_slot.as_secs() as f64)),
                    );
                    eth1_logging(&beacon_chain, &log);
+                    merge_readiness_logging(Slot::new(0), &beacon_chain, &log).await;
+                    capella_readiness_logging(Slot::new(0), &beacon_chain, &log).await;
+                    genesis_execution_payload_logging(&beacon_chain, &log).await;
                    sleep(slot_duration).await;
                }
                _ => break,
@ -365,7 +368,7 @@ async fn merge_readiness_logging<T: BeaconChainTypes>(
        return;
    }

-    match beacon_chain.check_merge_readiness().await {
+    match beacon_chain.check_merge_readiness(current_slot).await {
        MergeReadiness::Ready {
            config,
            current_difficulty,
@ -476,6 +479,79 @@ async fn capella_readiness_logging<T: BeaconChainTypes>(
    }
 }

+async fn genesis_execution_payload_logging<T: BeaconChainTypes>(
+    beacon_chain: &BeaconChain<T>,
+    log: &Logger,
+) {
+    match beacon_chain
+        .check_genesis_execution_payload_is_correct()
+        .await
+    {
+        Ok(GenesisExecutionPayloadStatus::Correct(block_hash)) => {
+            info!(
+                log,
+                "Execution enabled from genesis";
+                "genesis_payload_block_hash" => ?block_hash,
+            );
+        }
+        Ok(GenesisExecutionPayloadStatus::BlockHashMismatch { got, expected }) => {
+            error!(
+                log,
+                "Genesis payload block hash mismatch";
+                "info" => "genesis is misconfigured and likely to fail",
+                "consensus_node_block_hash" => ?expected,
+                "execution_node_block_hash" => ?got,
+            );
+        }
+        Ok(GenesisExecutionPayloadStatus::TransactionsRootMismatch { got, expected }) => {
+            error!(
+                log,
+                "Genesis payload transactions root mismatch";
+                "info" => "genesis is misconfigured and likely to fail",
+                "consensus_node_transactions_root" => ?expected,
+                "execution_node_transactions_root" => ?got,
+            );
+        }
+        Ok(GenesisExecutionPayloadStatus::WithdrawalsRootMismatch { got, expected }) => {
+            error!(
+                log,
+                "Genesis payload withdrawals root mismatch";
+                "info" => "genesis is misconfigured and likely to fail",
+                "consensus_node_withdrawals_root" => ?expected,
+                "execution_node_withdrawals_root" => ?got,
+            );
+        }
+        Ok(GenesisExecutionPayloadStatus::OtherMismatch) => {
+            error!(
+                log,
+                "Genesis payload header mismatch";
+                "info" => "genesis is misconfigured and likely to fail",
+                "detail" => "see debug logs for payload headers"
+            );
+        }
+        Ok(GenesisExecutionPayloadStatus::Irrelevant) => {
+            info!(
+                log,
+                "Execution is not enabled from genesis";
+            );
+        }
+        Ok(GenesisExecutionPayloadStatus::AlreadyHappened) => {
+            warn!(
+                log,
+                "Unable to check genesis which has already occurred";
+                "info" => "this is probably a race condition or a bug"
+            );
+        }
+        Err(e) => {
+            error!(
+                log,
+                "Unable to check genesis execution payload";
+                "error" => ?e
+            );
+        }
+    }
+}
+
 fn eth1_logging<T: BeaconChainTypes>(beacon_chain: &BeaconChain<T>, log: &Logger) {
    let current_slot_opt = beacon_chain.slot().ok();

--- a/beacon_node/execution_layer/src/lib.rs
+++ b/beacon_node/execution_layer/src/lib.rs
@ -520,9 +520,9 @@ impl<T: EthSpec> ExecutionLayer<T> {
    ///
    /// This function is a wrapper over `Self::is_synced` that makes an additional
    /// check for the execution layer sync status. Checks if the latest block has
-    /// a `block_number != 0`.
+    /// a `block_number != 0` *if* the `current_slot` is also `> 0`.
    /// Returns the `Self::is_synced` response if unable to get latest block.
-    pub async fn is_synced_for_notifier(&self) -> bool {
+    pub async fn is_synced_for_notifier(&self, current_slot: Slot) -> bool {
        let synced = self.is_synced().await;
        if synced {
            if let Ok(Some(block)) = self
@ -531,7 +531,7 @@ impl<T: EthSpec> ExecutionLayer<T> {
                .get_block_by_number(BlockByNumberQuery::Tag(LATEST_TAG))
                .await
            {
-                if block.block_number == 0 {
+                if block.block_number == 0 && current_slot > 0 {
                    return false;
                }
            }
@ -1605,6 +1605,17 @@ impl<T: EthSpec> ExecutionLayer<T> {
        }
    }

+    pub async fn get_block_by_number(
+        &self,
+        query: BlockByNumberQuery<'_>,
+    ) -> Result<Option<ExecutionBlock>, Error> {
+        self.engine()
+            .request(|engine| async move { engine.api.get_block_by_number(query).await })
+            .await
+            .map_err(Box::new)
+            .map_err(Error::EngineError)
+    }
+
    pub async fn get_payload_by_hash_legacy(
        &self,
        hash: ExecutionBlockHash,
--- a/beacon_node/http_api/src/lib.rs
+++ b/beacon_node/http_api/src/lib.rs
@ -4327,7 +4327,8 @@ pub fn serve<T: BeaconChainTypes>(
        .then(
            |task_spawner: TaskSpawner<T::EthSpec>, chain: Arc<BeaconChain<T>>| {
                task_spawner.spawn_async_with_rejection(Priority::P1, async move {
-                    let merge_readiness = chain.check_merge_readiness().await;
+                    let current_slot = chain.slot_clock.now_or_genesis().unwrap_or(Slot::new(0));
+                    let merge_readiness = chain.check_merge_readiness(current_slot).await;
                    Ok::<_, warp::reject::Rejection>(
                        warp::reply::json(&api_types::GenericResponse::from(merge_readiness))
                            .into_response(),
--- a/consensus/types/src/execution_payload_header.rs
+++ b/consensus/types/src/execution_payload_header.rs
@ -28,7 +28,10 @@ use BeaconStateError;
        serde(bound = "T: EthSpec", deny_unknown_fields),
        arbitrary(bound = "T: EthSpec")
    ),
-    ref_attributes(derive(PartialEq, TreeHash), tree_hash(enum_behaviour = "transparent")),
+    ref_attributes(
+        derive(PartialEq, TreeHash, Debug),
+        tree_hash(enum_behaviour = "transparent")
+    ),
    cast_error(ty = "Error", expr = "BeaconStateError::IncorrectStateVariant"),
    partial_getter_error(ty = "Error", expr = "BeaconStateError::IncorrectStateVariant")
 )]