Update node health endpoint (#4310)

## Issue Addressed [#4292](https://github.com/sigp/lighthouse/issues/4292) ## Proposed Changes Updated the node health endpoint will return a 200 status code if `!syncing && !el_offline && !optimistic` wil return a 206 if `(syncing || optimistic) && !el_offline` will return a 503 if `el_offline` ## Additional Info
2023-06-30 01:13:04 +00:00 · 2023-06-30 01:13:04 +00:00 · 826e090f50
commit 826e090f50
parent edd093293a
3 changed files with 125 additions and 22 deletions
--- a/beacon_node/http_api/src/lib.rs
+++ b/beacon_node/http_api/src/lib.rs
@ -2418,24 +2418,41 @@ pub fn serve<T: BeaconChainTypes>(
        .and(warp::path("health"))
        .and(warp::path::end())
        .and(network_globals.clone())
-        .and_then(|network_globals: Arc<NetworkGlobals<T::EthSpec>>| {
+        .and(chain_filter.clone())
-            blocking_response_task(move || match *network_globals.sync_state.read() {
+        .and_then(
-                SyncState::SyncingFinalized { .. }
+            |network_globals: Arc<NetworkGlobals<T::EthSpec>>, chain: Arc<BeaconChain<T>>| {
-                | SyncState::SyncingHead { .. }
+                async move {
-                | SyncState::SyncTransition
+                    let el_offline = if let Some(el) = &chain.execution_layer {
-                | SyncState::BackFillSyncing { .. } => Ok(warp::reply::with_status(
+                        el.is_offline_or_erroring().await
-                    warp::reply(),
+                    } else {
-                    warp::http::StatusCode::PARTIAL_CONTENT,
+                        true
-                )),
+                    };
-                SyncState::Synced => Ok(warp::reply::with_status(
+
-                    warp::reply(),
+                    blocking_response_task(move || {
-                    warp::http::StatusCode::OK,
+                        let is_optimistic = chain
-                )),
+                            .is_optimistic_or_invalid_head()
-                SyncState::Stalled => Err(warp_utils::reject::not_synced(
+                            .map_err(warp_utils::reject::beacon_chain_error)?;
-                    "sync stalled, beacon chain may not yet be initialized.".to_string(),
+
-                )),
+                        let is_syncing = !network_globals.sync_state.read().is_synced();
-            })
+
-        });
+                        if el_offline {
                            Err(warp_utils::reject::not_synced("execution layer is offline".to_string()))
                        } else if is_syncing || is_optimistic {
                            Ok(warp::reply::with_status(
                                warp::reply(),
                                warp::http::StatusCode::PARTIAL_CONTENT,
                            ))
                        } else {
                            Ok(warp::reply::with_status(
                                warp::reply(),
                                warp::http::StatusCode::OK,
                            ))
                        }
                    })
                    .await
                }
            },
        );
    // GET node/peers/{peer_id}
    let get_node_peers_by_id = eth_v1
--- a/beacon_node/http_api/tests/status_tests.rs
+++ b/beacon_node/http_api/tests/status_tests.rs
@ -3,6 +3,7 @@ use beacon_chain::{
    test_utils::{AttestationStrategy, BlockStrategy, SyncCommitteeStrategy},
    BlockError,
 };
 use eth2::StatusCode;
 use execution_layer::{PayloadStatusV1, PayloadStatusV1Status};
 use http_api::test_utils::InteractiveTester;
 use types::{EthSpec, ExecPayload, ForkName, MinimalEthSpec, Slot};
@ -143,3 +144,82 @@ async fn el_error_on_new_payload() {
    assert_eq!(api_response.is_optimistic, Some(false));
    assert_eq!(api_response.is_syncing, false);
 }
 /// Check `node health` endpoint when the EL is offline.
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn node_health_el_offline() {
    let num_blocks = E::slots_per_epoch() / 2;
    let num_validators = E::slots_per_epoch();
    let tester = post_merge_tester(num_blocks, num_validators).await;
    let harness = &tester.harness;
    let mock_el = harness.mock_execution_layer.as_ref().unwrap();
    // EL offline
    mock_el.server.set_syncing_response(Err("offline".into()));
    mock_el.el.upcheck().await;
    let status = tester.client.get_node_health().await;
    match status {
        Ok(_) => {
            panic!("should return 503 error status code");
        }
        Err(e) => {
            assert_eq!(e.status().unwrap(), 503);
        }
    }
 }
 /// Check `node health` endpoint when the EL is online and synced.
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn node_health_el_online_and_synced() {
    let num_blocks = E::slots_per_epoch() / 2;
    let num_validators = E::slots_per_epoch();
    let tester = post_merge_tester(num_blocks, num_validators).await;
    let harness = &tester.harness;
    let mock_el = harness.mock_execution_layer.as_ref().unwrap();
    // EL synced
    mock_el.server.set_syncing_response(Ok(false));
    mock_el.el.upcheck().await;
    let status = tester.client.get_node_health().await;
    match status {
        Ok(response) => {
            assert_eq!(response, StatusCode::OK);
        }
        Err(_) => {
            panic!("should return 200 status code");
        }
    }
 }
 /// Check `node health` endpoint when the EL is online but not synced.
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn node_health_el_online_and_not_synced() {
    let num_blocks = E::slots_per_epoch() / 2;
    let num_validators = E::slots_per_epoch();
    let tester = post_merge_tester(num_blocks, num_validators).await;
    let harness = &tester.harness;
    let mock_el = harness.mock_execution_layer.as_ref().unwrap();
    // EL not synced
    harness.advance_slot();
    mock_el.server.all_payloads_syncing(true);
    harness
        .extend_chain(
            1,
            BlockStrategy::OnCanonicalHead,
            AttestationStrategy::AllValidators,
        )
        .await;
    let status = tester.client.get_node_health().await;
    match status {
        Ok(response) => {
            assert_eq!(response, StatusCode::PARTIAL_CONTENT);
        }
        Err(_) => {
            panic!("should return 206 status code");
        }
    }
 }
--- a/beacon_node/http_api/tests/tests.rs
+++ b/beacon_node/http_api/tests/tests.rs
@ -8,7 +8,7 @@ use eth2::{
    mixin::{RequestAccept, ResponseForkName, ResponseOptional},
    reqwest::RequestBuilder,
    types::{BlockId as CoreBlockId, ForkChoiceNode, StateId as CoreStateId, *},
-    BeaconNodeHttpClient, Error, StatusCode, Timeouts,
+    BeaconNodeHttpClient, Error, Timeouts,
 };
 use execution_layer::test_utils::TestingBuilder;
 use execution_layer::test_utils::DEFAULT_BUILDER_THRESHOLD_WEI;
@ -1762,9 +1762,15 @@ impl ApiTester {
    }
    pub async fn test_get_node_health(self) -> Self {
-        let status = self.client.get_node_health().await.unwrap();
+        let status = self.client.get_node_health().await;
-        assert_eq!(status, StatusCode::OK);
+        match status {
-
+            Ok(_) => {
                panic!("should return 503 error status code");
            }
            Err(e) => {
                assert_eq!(e.status().unwrap(), 503);
            }
        }
        self
    }