lighthouse/beacon_node/http_api/tests/status_tests.rs

//! Tests related to the beacon node's sync status
use beacon_chain::{
    test_utils::{AttestationStrategy, BlockStrategy, SyncCommitteeStrategy},
    BlockError,
};
use eth2::StatusCode;
use execution_layer::{PayloadStatusV1, PayloadStatusV1Status};
use http_api::test_utils::InteractiveTester;
use types::{EthSpec, ExecPayload, ForkName, MinimalEthSpec, Slot};

type E = MinimalEthSpec;

/// Create a new test environment that is post-merge with `chain_depth` blocks.
async fn post_merge_tester(chain_depth: u64, validator_count: u64) -> InteractiveTester<E> {
    // Test using latest fork so that we simulate conditions as similar to mainnet as possible.
    let mut spec = ForkName::latest().make_genesis_spec(E::default_spec());
    spec.terminal_total_difficulty = 1.into();

    let tester = InteractiveTester::<E>::new(Some(spec), validator_count as usize).await;
    let harness = &tester.harness;
    let mock_el = harness.mock_execution_layer.as_ref().unwrap();
    let execution_ctx = mock_el.server.ctx.clone();

    // Move to terminal block.
    mock_el.server.all_payloads_valid();
    execution_ctx
        .execution_block_generator
        .write()
        .move_to_terminal_block()
        .unwrap();

    // Create some chain depth.
    harness.advance_slot();
    harness
        .extend_chain_with_sync(
            chain_depth as usize,
            BlockStrategy::OnCanonicalHead,
            AttestationStrategy::AllValidators,
            SyncCommitteeStrategy::AllValidators,
        )
        .await;
    tester
}

/// Check `syncing` endpoint when the EL is syncing.
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn el_syncing_then_synced() {
    let num_blocks = E::slots_per_epoch() / 2;
    let num_validators = E::slots_per_epoch();
    let tester = post_merge_tester(num_blocks, num_validators).await;
    let harness = &tester.harness;
    let mock_el = harness.mock_execution_layer.as_ref().unwrap();

    // EL syncing
    mock_el.server.set_syncing_response(Ok(true));
    mock_el.el.upcheck().await;

    let api_response = tester.client.get_node_syncing().await.unwrap().data;
    assert_eq!(api_response.el_offline, Some(false));
    assert_eq!(api_response.is_optimistic, Some(false));
    assert_eq!(api_response.is_syncing, false);

    // EL synced
    mock_el.server.set_syncing_response(Ok(false));
    mock_el.el.upcheck().await;

    let api_response = tester.client.get_node_syncing().await.unwrap().data;
    assert_eq!(api_response.el_offline, Some(false));
    assert_eq!(api_response.is_optimistic, Some(false));
    assert_eq!(api_response.is_syncing, false);
}

/// Check `syncing` endpoint when the EL is offline (errors on upcheck).
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn el_offline() {
    let num_blocks = E::slots_per_epoch() / 2;
    let num_validators = E::slots_per_epoch();
    let tester = post_merge_tester(num_blocks, num_validators).await;
    let harness = &tester.harness;
    let mock_el = harness.mock_execution_layer.as_ref().unwrap();

    // EL offline
    mock_el.server.set_syncing_response(Err("offline".into()));
    mock_el.el.upcheck().await;

    let api_response = tester.client.get_node_syncing().await.unwrap().data;
    assert_eq!(api_response.el_offline, Some(true));
    assert_eq!(api_response.is_optimistic, Some(false));
    assert_eq!(api_response.is_syncing, false);
}

/// Check `syncing` endpoint when the EL errors on newPaylod but is not fully offline.
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn el_error_on_new_payload() {
    let num_blocks = E::slots_per_epoch() / 2;
    let num_validators = E::slots_per_epoch();
    let tester = post_merge_tester(num_blocks, num_validators).await;
    let harness = &tester.harness;
    let mock_el = harness.mock_execution_layer.as_ref().unwrap();

    // Make a block.
    let pre_state = harness.get_current_state();
    let (block, _) = harness
        .make_block(pre_state, Slot::new(num_blocks + 1))
        .await;
    let block_hash = block
        .message()
        .body()
        .execution_payload()
        .unwrap()
        .block_hash();

    // Make sure `newPayload` errors for the new block.
    mock_el
        .server
        .set_new_payload_error(block_hash, "error".into());

    // Attempt to process the block, which should error.
    harness.advance_slot();
    assert!(matches!(
        harness.process_block_result(block.clone()).await,
        Err(BlockError::ExecutionPayloadError(_))
    ));

    // The EL should now be *offline* according to the API.
    let api_response = tester.client.get_node_syncing().await.unwrap().data;
    assert_eq!(api_response.el_offline, Some(true));
    assert_eq!(api_response.is_optimistic, Some(false));
    assert_eq!(api_response.is_syncing, false);

    // Processing a block successfully should remove the status.
    mock_el.server.set_new_payload_status(
        block_hash,
        PayloadStatusV1 {
            status: PayloadStatusV1Status::Valid,
            latest_valid_hash: Some(block_hash),
            validation_error: None,
        },
    );
    harness.process_block_result(block).await.unwrap();

    let api_response = tester.client.get_node_syncing().await.unwrap().data;
    assert_eq!(api_response.el_offline, Some(false));
    assert_eq!(api_response.is_optimistic, Some(false));
    assert_eq!(api_response.is_syncing, false);
}

/// Check `node health` endpoint when the EL is offline.
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn node_health_el_offline() {
    let num_blocks = E::slots_per_epoch() / 2;
    let num_validators = E::slots_per_epoch();
    let tester = post_merge_tester(num_blocks, num_validators).await;
    let harness = &tester.harness;
    let mock_el = harness.mock_execution_layer.as_ref().unwrap();

    // EL offline
    mock_el.server.set_syncing_response(Err("offline".into()));
    mock_el.el.upcheck().await;

    let status = tester.client.get_node_health().await;
    match status {
        Ok(_) => {
            panic!("should return 503 error status code");
        }
        Err(e) => {
            assert_eq!(e.status().unwrap(), 503);
        }
    }
}

/// Check `node health` endpoint when the EL is online and synced.
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn node_health_el_online_and_synced() {
    let num_blocks = E::slots_per_epoch() / 2;
    let num_validators = E::slots_per_epoch();
    let tester = post_merge_tester(num_blocks, num_validators).await;
    let harness = &tester.harness;
    let mock_el = harness.mock_execution_layer.as_ref().unwrap();

    // EL synced
    mock_el.server.set_syncing_response(Ok(false));
    mock_el.el.upcheck().await;

    let status = tester.client.get_node_health().await;
    match status {
        Ok(response) => {
            assert_eq!(response, StatusCode::OK);
        }
        Err(_) => {
            panic!("should return 200 status code");
        }
    }
}

/// Check `node health` endpoint when the EL is online but not synced.
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn node_health_el_online_and_not_synced() {
    let num_blocks = E::slots_per_epoch() / 2;
    let num_validators = E::slots_per_epoch();
    let tester = post_merge_tester(num_blocks, num_validators).await;
    let harness = &tester.harness;
    let mock_el = harness.mock_execution_layer.as_ref().unwrap();

    // EL not synced
    harness.advance_slot();
    mock_el.server.all_payloads_syncing(true);
    harness
        .extend_chain(
            1,
            BlockStrategy::OnCanonicalHead,
            AttestationStrategy::AllValidators,
        )
        .await;

    let status = tester.client.get_node_health().await;
    match status {
        Ok(response) => {
            assert_eq!(response, StatusCode::PARTIAL_CONTENT);
        }
        Err(_) => {
            panic!("should return 206 status code");
        }
    }
}
Implement `el_offline` and use it in the VC (#4295) ## Issue Addressed Closes https://github.com/sigp/lighthouse/issues/4291, part of #3613. ## Proposed Changes - Implement the `el_offline` field on `/eth/v1/node/syncing`. We set `el_offline=true` if: - The EL's internal status is `Offline` or `AuthFailed`, _or_ - The most recent call to `newPayload` resulted in an error (more on this in a moment). - Use the `el_offline` field in the VC to mark nodes with offline ELs as _unsynced_. These nodes will still be used, but only after synced nodes. - Overhaul the usage of `RequireSynced` so that `::No` is used almost everywhere. The `--allow-unsynced` flag was broken and had the opposite effect to intended, so it has been deprecated. - Add tests for the EL being offline on the upcheck call, and being offline due to the newPayload check. ## Why track `newPayload` errors? Tracking the EL's online/offline status is too coarse-grained to be useful in practice, because: - If the EL is timing out to some calls, it's unlikely to timeout on the `upcheck` call, which is _just_ `eth_syncing`. Every failed call is followed by an upcheck [here](https://github.com/sigp/lighthouse/blob/693886b94176faa4cb450f024696cb69cda2fe58/beacon_node/execution_layer/src/engines.rs#L372-L380), which would have the effect of masking the failure and keeping the status _online_. - The `newPayload` call is the most likely to time out. It's the call in which ELs tend to do most of their work (often 1-2 seconds), with `forkchoiceUpdated` usually returning much faster (<50ms). - If `newPayload` is failing consistently (e.g. timing out) then this is a good indication that either the node's EL is in trouble, or the network as a whole is. In the first case validator clients _should_ prefer other BNs if they have one available. In the second case, all of their BNs will likely report `el_offline` and they'll just have to proceed with trying to use them. ## Additional Changes - Add utility method `ForkName::latest` which is quite convenient for test writing, but probably other things too. - Delete some stale comments from when we used to support multiple execution nodes. 2023-05-17 05:51:56 +00:00			`//! Tests related to the beacon node's sync status`
			`use beacon_chain::{`
			`test_utils::{AttestationStrategy, BlockStrategy, SyncCommitteeStrategy},`
			`BlockError,`
			`};`
Update node health endpoint (#4310) ## Issue Addressed [#4292](https://github.com/sigp/lighthouse/issues/4292) ## Proposed Changes Updated the node health endpoint will return a 200 status code if `!syncing && !el_offline && !optimistic` wil return a 206 if `(syncing \|\| optimistic) && !el_offline` will return a 503 if `el_offline` ## Additional Info 2023-06-30 01:13:04 +00:00			`use eth2::StatusCode;`
Implement `el_offline` and use it in the VC (#4295) ## Issue Addressed Closes https://github.com/sigp/lighthouse/issues/4291, part of #3613. ## Proposed Changes - Implement the `el_offline` field on `/eth/v1/node/syncing`. We set `el_offline=true` if: - The EL's internal status is `Offline` or `AuthFailed`, _or_ - The most recent call to `newPayload` resulted in an error (more on this in a moment). - Use the `el_offline` field in the VC to mark nodes with offline ELs as _unsynced_. These nodes will still be used, but only after synced nodes. - Overhaul the usage of `RequireSynced` so that `::No` is used almost everywhere. The `--allow-unsynced` flag was broken and had the opposite effect to intended, so it has been deprecated. - Add tests for the EL being offline on the upcheck call, and being offline due to the newPayload check. ## Why track `newPayload` errors? Tracking the EL's online/offline status is too coarse-grained to be useful in practice, because: - If the EL is timing out to some calls, it's unlikely to timeout on the `upcheck` call, which is _just_ `eth_syncing`. Every failed call is followed by an upcheck [here](https://github.com/sigp/lighthouse/blob/693886b94176faa4cb450f024696cb69cda2fe58/beacon_node/execution_layer/src/engines.rs#L372-L380), which would have the effect of masking the failure and keeping the status _online_. - The `newPayload` call is the most likely to time out. It's the call in which ELs tend to do most of their work (often 1-2 seconds), with `forkchoiceUpdated` usually returning much faster (<50ms). - If `newPayload` is failing consistently (e.g. timing out) then this is a good indication that either the node's EL is in trouble, or the network as a whole is. In the first case validator clients _should_ prefer other BNs if they have one available. In the second case, all of their BNs will likely report `el_offline` and they'll just have to proceed with trying to use them. ## Additional Changes - Add utility method `ForkName::latest` which is quite convenient for test writing, but probably other things too. - Delete some stale comments from when we used to support multiple execution nodes. 2023-05-17 05:51:56 +00:00			`use execution_layer::{PayloadStatusV1, PayloadStatusV1Status};`
			`use http_api::test_utils::InteractiveTester;`
			`use types::{EthSpec, ExecPayload, ForkName, MinimalEthSpec, Slot};`

			`type E = MinimalEthSpec;`

			/// Create a new test environment that is post-merge with `chain_depth` blocks.
			`async fn post_merge_tester(chain_depth: u64, validator_count: u64) -> InteractiveTester<E> {`
			`// Test using latest fork so that we simulate conditions as similar to mainnet as possible.`
			`let mut spec = ForkName::latest().make_genesis_spec(E::default_spec());`
			`spec.terminal_total_difficulty = 1.into();`

			`let tester = InteractiveTester::<E>::new(Some(spec), validator_count as usize).await;`
			`let harness = &tester.harness;`
			`let mock_el = harness.mock_execution_layer.as_ref().unwrap();`
			`let execution_ctx = mock_el.server.ctx.clone();`

			`// Move to terminal block.`
			`mock_el.server.all_payloads_valid();`
			`execution_ctx`
			`.execution_block_generator`
			`.write()`
			`.move_to_terminal_block()`
			`.unwrap();`

			`// Create some chain depth.`
			`harness.advance_slot();`
			`harness`
			`.extend_chain_with_sync(`
			`chain_depth as usize,`
			`BlockStrategy::OnCanonicalHead,`
			`AttestationStrategy::AllValidators,`
			`SyncCommitteeStrategy::AllValidators,`
			`)`
			`.await;`
			`tester`
			`}`

			/// Check `syncing` endpoint when the EL is syncing.
			`#[tokio::test(flavor = "multi_thread", worker_threads = 2)]`
			`async fn el_syncing_then_synced() {`
			`let num_blocks = E::slots_per_epoch() / 2;`
			`let num_validators = E::slots_per_epoch();`
			`let tester = post_merge_tester(num_blocks, num_validators).await;`
			`let harness = &tester.harness;`
			`let mock_el = harness.mock_execution_layer.as_ref().unwrap();`

			`// EL syncing`
			`mock_el.server.set_syncing_response(Ok(true));`
			`mock_el.el.upcheck().await;`

			`let api_response = tester.client.get_node_syncing().await.unwrap().data;`
			`assert_eq!(api_response.el_offline, Some(false));`
			`assert_eq!(api_response.is_optimistic, Some(false));`
			`assert_eq!(api_response.is_syncing, false);`

			`// EL synced`
			`mock_el.server.set_syncing_response(Ok(false));`
			`mock_el.el.upcheck().await;`

			`let api_response = tester.client.get_node_syncing().await.unwrap().data;`
			`assert_eq!(api_response.el_offline, Some(false));`
			`assert_eq!(api_response.is_optimistic, Some(false));`
			`assert_eq!(api_response.is_syncing, false);`
			`}`

			/// Check `syncing` endpoint when the EL is offline (errors on upcheck).
			`#[tokio::test(flavor = "multi_thread", worker_threads = 2)]`
			`async fn el_offline() {`
			`let num_blocks = E::slots_per_epoch() / 2;`
			`let num_validators = E::slots_per_epoch();`
			`let tester = post_merge_tester(num_blocks, num_validators).await;`
			`let harness = &tester.harness;`
			`let mock_el = harness.mock_execution_layer.as_ref().unwrap();`

			`// EL offline`
			`mock_el.server.set_syncing_response(Err("offline".into()));`
			`mock_el.el.upcheck().await;`

			`let api_response = tester.client.get_node_syncing().await.unwrap().data;`
			`assert_eq!(api_response.el_offline, Some(true));`
			`assert_eq!(api_response.is_optimistic, Some(false));`
			`assert_eq!(api_response.is_syncing, false);`
			`}`

			/// Check `syncing` endpoint when the EL errors on newPaylod but is not fully offline.
			`#[tokio::test(flavor = "multi_thread", worker_threads = 2)]`
			`async fn el_error_on_new_payload() {`
			`let num_blocks = E::slots_per_epoch() / 2;`
			`let num_validators = E::slots_per_epoch();`
			`let tester = post_merge_tester(num_blocks, num_validators).await;`
			`let harness = &tester.harness;`
			`let mock_el = harness.mock_execution_layer.as_ref().unwrap();`

			`// Make a block.`
			`let pre_state = harness.get_current_state();`
			`let (block, _) = harness`
			`.make_block(pre_state, Slot::new(num_blocks + 1))`
			`.await;`
			`let block_hash = block`
			`.message()`
			`.body()`
			`.execution_payload()`
			`.unwrap()`
			`.block_hash();`

			// Make sure `newPayload` errors for the new block.
			`mock_el`
			`.server`
			`.set_new_payload_error(block_hash, "error".into());`

			`// Attempt to process the block, which should error.`
			`harness.advance_slot();`
			`assert!(matches!(`
			`harness.process_block_result(block.clone()).await,`
			`Err(BlockError::ExecutionPayloadError(_))`
			`));`

			`// The EL should now be offline according to the API.`
			`let api_response = tester.client.get_node_syncing().await.unwrap().data;`
			`assert_eq!(api_response.el_offline, Some(true));`
			`assert_eq!(api_response.is_optimistic, Some(false));`
			`assert_eq!(api_response.is_syncing, false);`

			`// Processing a block successfully should remove the status.`
			`mock_el.server.set_new_payload_status(`
			`block_hash,`
			`PayloadStatusV1 {`
			`status: PayloadStatusV1Status::Valid,`
			`latest_valid_hash: Some(block_hash),`
			`validation_error: None,`
			`},`
			`);`
			`harness.process_block_result(block).await.unwrap();`

			`let api_response = tester.client.get_node_syncing().await.unwrap().data;`
			`assert_eq!(api_response.el_offline, Some(false));`
			`assert_eq!(api_response.is_optimistic, Some(false));`
			`assert_eq!(api_response.is_syncing, false);`
			`}`
Update node health endpoint (#4310) ## Issue Addressed [#4292](https://github.com/sigp/lighthouse/issues/4292) ## Proposed Changes Updated the node health endpoint will return a 200 status code if `!syncing && !el_offline && !optimistic` wil return a 206 if `(syncing \|\| optimistic) && !el_offline` will return a 503 if `el_offline` ## Additional Info 2023-06-30 01:13:04 +00:00
			/// Check `node health` endpoint when the EL is offline.
			`#[tokio::test(flavor = "multi_thread", worker_threads = 2)]`
			`async fn node_health_el_offline() {`
			`let num_blocks = E::slots_per_epoch() / 2;`
			`let num_validators = E::slots_per_epoch();`
			`let tester = post_merge_tester(num_blocks, num_validators).await;`
			`let harness = &tester.harness;`
			`let mock_el = harness.mock_execution_layer.as_ref().unwrap();`

			`// EL offline`
			`mock_el.server.set_syncing_response(Err("offline".into()));`
			`mock_el.el.upcheck().await;`

			`let status = tester.client.get_node_health().await;`
			`match status {`
			`Ok(_) => {`
			`panic!("should return 503 error status code");`
			`}`
			`Err(e) => {`
			`assert_eq!(e.status().unwrap(), 503);`
			`}`
			`}`
			`}`

			/// Check `node health` endpoint when the EL is online and synced.
			`#[tokio::test(flavor = "multi_thread", worker_threads = 2)]`
			`async fn node_health_el_online_and_synced() {`
			`let num_blocks = E::slots_per_epoch() / 2;`
			`let num_validators = E::slots_per_epoch();`
			`let tester = post_merge_tester(num_blocks, num_validators).await;`
			`let harness = &tester.harness;`
			`let mock_el = harness.mock_execution_layer.as_ref().unwrap();`

			`// EL synced`
			`mock_el.server.set_syncing_response(Ok(false));`
			`mock_el.el.upcheck().await;`

			`let status = tester.client.get_node_health().await;`
			`match status {`
			`Ok(response) => {`
			`assert_eq!(response, StatusCode::OK);`
			`}`
			`Err(_) => {`
			`panic!("should return 200 status code");`
			`}`
			`}`
			`}`

			/// Check `node health` endpoint when the EL is online but not synced.
			`#[tokio::test(flavor = "multi_thread", worker_threads = 2)]`
			`async fn node_health_el_online_and_not_synced() {`
			`let num_blocks = E::slots_per_epoch() / 2;`
			`let num_validators = E::slots_per_epoch();`
			`let tester = post_merge_tester(num_blocks, num_validators).await;`
			`let harness = &tester.harness;`
			`let mock_el = harness.mock_execution_layer.as_ref().unwrap();`

			`// EL not synced`
			`harness.advance_slot();`
			`mock_el.server.all_payloads_syncing(true);`
			`harness`
			`.extend_chain(`
			`1,`
			`BlockStrategy::OnCanonicalHead,`
			`AttestationStrategy::AllValidators,`
			`)`
			`.await;`

			`let status = tester.client.get_node_health().await;`
			`match status {`
			`Ok(response) => {`
			`assert_eq!(response, StatusCode::PARTIAL_CONTENT);`
			`}`
			`Err(_) => {`
			`panic!("should return 206 status code");`
			`}`
			`}`
			`}`