Update node health endpoint (#4310)

## Issue Addressed

[#4292](https://github.com/sigp/lighthouse/issues/4292)

## Proposed Changes

Updated the node health endpoint

will return a 200 status code if  `!syncing && !el_offline && !optimistic`

wil return a 206 if `(syncing || optimistic) &&  !el_offline`

will return a 503 if `el_offline`



## Additional Info
This commit is contained in:
Eitan Seri-Levi 2023-06-30 01:13:04 +00:00
parent edd093293a
commit 826e090f50
3 changed files with 125 additions and 22 deletions

View File

@ -2418,24 +2418,41 @@ pub fn serve<T: BeaconChainTypes>(
.and(warp::path("health")) .and(warp::path("health"))
.and(warp::path::end()) .and(warp::path::end())
.and(network_globals.clone()) .and(network_globals.clone())
.and_then(|network_globals: Arc<NetworkGlobals<T::EthSpec>>| { .and(chain_filter.clone())
blocking_response_task(move || match *network_globals.sync_state.read() { .and_then(
SyncState::SyncingFinalized { .. } |network_globals: Arc<NetworkGlobals<T::EthSpec>>, chain: Arc<BeaconChain<T>>| {
| SyncState::SyncingHead { .. } async move {
| SyncState::SyncTransition let el_offline = if let Some(el) = &chain.execution_layer {
| SyncState::BackFillSyncing { .. } => Ok(warp::reply::with_status( el.is_offline_or_erroring().await
warp::reply(), } else {
warp::http::StatusCode::PARTIAL_CONTENT, true
)), };
SyncState::Synced => Ok(warp::reply::with_status(
warp::reply(), blocking_response_task(move || {
warp::http::StatusCode::OK, let is_optimistic = chain
)), .is_optimistic_or_invalid_head()
SyncState::Stalled => Err(warp_utils::reject::not_synced( .map_err(warp_utils::reject::beacon_chain_error)?;
"sync stalled, beacon chain may not yet be initialized.".to_string(),
)), let is_syncing = !network_globals.sync_state.read().is_synced();
})
}); if el_offline {
Err(warp_utils::reject::not_synced("execution layer is offline".to_string()))
} else if is_syncing || is_optimistic {
Ok(warp::reply::with_status(
warp::reply(),
warp::http::StatusCode::PARTIAL_CONTENT,
))
} else {
Ok(warp::reply::with_status(
warp::reply(),
warp::http::StatusCode::OK,
))
}
})
.await
}
},
);
// GET node/peers/{peer_id} // GET node/peers/{peer_id}
let get_node_peers_by_id = eth_v1 let get_node_peers_by_id = eth_v1

View File

@ -3,6 +3,7 @@ use beacon_chain::{
test_utils::{AttestationStrategy, BlockStrategy, SyncCommitteeStrategy}, test_utils::{AttestationStrategy, BlockStrategy, SyncCommitteeStrategy},
BlockError, BlockError,
}; };
use eth2::StatusCode;
use execution_layer::{PayloadStatusV1, PayloadStatusV1Status}; use execution_layer::{PayloadStatusV1, PayloadStatusV1Status};
use http_api::test_utils::InteractiveTester; use http_api::test_utils::InteractiveTester;
use types::{EthSpec, ExecPayload, ForkName, MinimalEthSpec, Slot}; use types::{EthSpec, ExecPayload, ForkName, MinimalEthSpec, Slot};
@ -143,3 +144,82 @@ async fn el_error_on_new_payload() {
assert_eq!(api_response.is_optimistic, Some(false)); assert_eq!(api_response.is_optimistic, Some(false));
assert_eq!(api_response.is_syncing, false); assert_eq!(api_response.is_syncing, false);
} }
/// Check `node health` endpoint when the EL is offline.
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn node_health_el_offline() {
let num_blocks = E::slots_per_epoch() / 2;
let num_validators = E::slots_per_epoch();
let tester = post_merge_tester(num_blocks, num_validators).await;
let harness = &tester.harness;
let mock_el = harness.mock_execution_layer.as_ref().unwrap();
// EL offline
mock_el.server.set_syncing_response(Err("offline".into()));
mock_el.el.upcheck().await;
let status = tester.client.get_node_health().await;
match status {
Ok(_) => {
panic!("should return 503 error status code");
}
Err(e) => {
assert_eq!(e.status().unwrap(), 503);
}
}
}
/// Check `node health` endpoint when the EL is online and synced.
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn node_health_el_online_and_synced() {
let num_blocks = E::slots_per_epoch() / 2;
let num_validators = E::slots_per_epoch();
let tester = post_merge_tester(num_blocks, num_validators).await;
let harness = &tester.harness;
let mock_el = harness.mock_execution_layer.as_ref().unwrap();
// EL synced
mock_el.server.set_syncing_response(Ok(false));
mock_el.el.upcheck().await;
let status = tester.client.get_node_health().await;
match status {
Ok(response) => {
assert_eq!(response, StatusCode::OK);
}
Err(_) => {
panic!("should return 200 status code");
}
}
}
/// Check `node health` endpoint when the EL is online but not synced.
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn node_health_el_online_and_not_synced() {
let num_blocks = E::slots_per_epoch() / 2;
let num_validators = E::slots_per_epoch();
let tester = post_merge_tester(num_blocks, num_validators).await;
let harness = &tester.harness;
let mock_el = harness.mock_execution_layer.as_ref().unwrap();
// EL not synced
harness.advance_slot();
mock_el.server.all_payloads_syncing(true);
harness
.extend_chain(
1,
BlockStrategy::OnCanonicalHead,
AttestationStrategy::AllValidators,
)
.await;
let status = tester.client.get_node_health().await;
match status {
Ok(response) => {
assert_eq!(response, StatusCode::PARTIAL_CONTENT);
}
Err(_) => {
panic!("should return 206 status code");
}
}
}

View File

@ -8,7 +8,7 @@ use eth2::{
mixin::{RequestAccept, ResponseForkName, ResponseOptional}, mixin::{RequestAccept, ResponseForkName, ResponseOptional},
reqwest::RequestBuilder, reqwest::RequestBuilder,
types::{BlockId as CoreBlockId, ForkChoiceNode, StateId as CoreStateId, *}, types::{BlockId as CoreBlockId, ForkChoiceNode, StateId as CoreStateId, *},
BeaconNodeHttpClient, Error, StatusCode, Timeouts, BeaconNodeHttpClient, Error, Timeouts,
}; };
use execution_layer::test_utils::TestingBuilder; use execution_layer::test_utils::TestingBuilder;
use execution_layer::test_utils::DEFAULT_BUILDER_THRESHOLD_WEI; use execution_layer::test_utils::DEFAULT_BUILDER_THRESHOLD_WEI;
@ -1762,9 +1762,15 @@ impl ApiTester {
} }
pub async fn test_get_node_health(self) -> Self { pub async fn test_get_node_health(self) -> Self {
let status = self.client.get_node_health().await.unwrap(); let status = self.client.get_node_health().await;
assert_eq!(status, StatusCode::OK); match status {
Ok(_) => {
panic!("should return 503 error status code");
}
Err(e) => {
assert_eq!(e.status().unwrap(), 503);
}
}
self self
} }