Simplify error handling after engines fallback removal (#3283)

## Issue Addressed
Part of #3118, continuation of #3257

## Proposed Changes
- the [ `first_success_without_retry` ](9c429d0764/beacon_node/execution_layer/src/engines.rs (L348-L351)) function returns a single error.
- the [`first_success`](9c429d0764/beacon_node/execution_layer/src/engines.rs (L324)) function returns a single error.
- [ `EngineErrors` ](9c429d0764/beacon_node/execution_layer/src/lib.rs (L69)) carries a single error.
- [`EngineError`](9c429d0764/beacon_node/execution_layer/src/engines.rs (L173-L177)) now does not need to carry an Id
- [`process_multiple_payload_statuses`](9c429d0764/beacon_node/execution_layer/src/payload_status.rs (L46-L50)) now doesn't need to receive an iterator of statuses and weight in different errors

## Additional Info
This is built on top of #3294
This commit is contained in:
Divma 2022-07-04 02:56:13 +00:00
parent 61ed5f0ec6
commit 1219da9a45
3 changed files with 154 additions and 248 deletions

View File

@ -57,7 +57,6 @@ struct PayloadIdCacheKey {
/// An execution engine. /// An execution engine.
pub struct Engine<T> { pub struct Engine<T> {
pub id: String,
pub api: HttpJsonRpc<T>, pub api: HttpJsonRpc<T>,
payload_id_cache: Mutex<LruCache<PayloadIdCacheKey, PayloadId>>, payload_id_cache: Mutex<LruCache<PayloadIdCacheKey, PayloadId>>,
state: RwLock<EngineState>, state: RwLock<EngineState>,
@ -65,9 +64,8 @@ pub struct Engine<T> {
impl<T> Engine<T> { impl<T> Engine<T> {
/// Creates a new, offline engine. /// Creates a new, offline engine.
pub fn new(id: String, api: HttpJsonRpc<T>) -> Self { pub fn new(api: HttpJsonRpc<T>) -> Self {
Self { Self {
id,
api, api,
payload_id_cache: Mutex::new(LruCache::new(PAYLOAD_ID_LRU_CACHE_SIZE)), payload_id_cache: Mutex::new(LruCache::new(PAYLOAD_ID_LRU_CACHE_SIZE)),
state: RwLock::new(EngineState::Offline), state: RwLock::new(EngineState::Offline),
@ -135,10 +133,10 @@ pub struct Engines {
#[derive(Debug)] #[derive(Debug)]
pub enum EngineError { pub enum EngineError {
Offline { id: String }, Offline,
Api { id: String, error: EngineApiError }, Api { error: EngineApiError },
BuilderApi { error: EngineApiError }, BuilderApi { error: EngineApiError },
Auth { id: String }, Auth,
} }
impl Engines { impl Engines {
@ -159,7 +157,6 @@ impl Engines {
self.log, self.log,
"No need to call forkchoiceUpdated"; "No need to call forkchoiceUpdated";
"msg" => "head does not have execution enabled", "msg" => "head does not have execution enabled",
"id" => &self.engine.id,
); );
return; return;
} }
@ -168,7 +165,6 @@ impl Engines {
self.log, self.log,
"Issuing forkchoiceUpdated"; "Issuing forkchoiceUpdated";
"forkchoice_state" => ?forkchoice_state, "forkchoice_state" => ?forkchoice_state,
"id" => &self.engine.id,
); );
// For simplicity, payload attributes are never included in this call. It may be // For simplicity, payload attributes are never included in this call. It may be
@ -183,14 +179,12 @@ impl Engines {
self.log, self.log,
"Failed to issue latest head to engine"; "Failed to issue latest head to engine";
"error" => ?e, "error" => ?e,
"id" => &self.engine.id,
); );
} }
} else { } else {
debug!( debug!(
self.log, self.log,
"No head, not sending to engine"; "No head, not sending to engine";
"id" => &self.engine.id,
); );
} }
} }
@ -261,45 +255,36 @@ impl Engines {
} }
} }
/// Run `func` on all engines, in the order in which they are defined, returning the first /// Run `func` on the node.
/// successful result that is found.
/// ///
/// This function might try to run `func` twice. If all nodes return an error on the first time /// This function might try to run `func` twice. If the node returns an error it will try to
/// it runs, it will try to upcheck all offline nodes and then run the function again. /// upcheck it and then run the function again.
pub async fn first_success<'a, F, G, H>(&'a self, func: F) -> Result<H, Vec<EngineError>> pub async fn first_success<'a, F, G, H>(&'a self, func: F) -> Result<H, EngineError>
where where
F: Fn(&'a Engine<EngineApi>) -> G + Copy, F: Fn(&'a Engine<EngineApi>) -> G + Copy,
G: Future<Output = Result<H, EngineApiError>>, G: Future<Output = Result<H, EngineApiError>>,
{ {
match self.first_success_without_retry(func).await { match self.first_success_without_retry(func).await {
Ok(result) => Ok(result), Ok(result) => Ok(result),
Err(mut first_errors) => { Err(e) => {
// Try to recover some nodes. debug!(self.log, "First engine call failed. Retrying"; "err" => ?e);
// Try to recover the node.
self.upcheck_not_synced(Logging::Enabled).await; self.upcheck_not_synced(Logging::Enabled).await;
// Retry the call on all nodes. // Try again.
match self.first_success_without_retry(func).await { self.first_success_without_retry(func).await
Ok(result) => Ok(result),
Err(second_errors) => {
first_errors.extend(second_errors);
Err(first_errors)
}
}
} }
} }
} }
/// Run `func` on all engines, in the order in which they are defined, returning the first /// Run `func` on the node.
/// successful result that is found.
pub async fn first_success_without_retry<'a, F, G, H>( pub async fn first_success_without_retry<'a, F, G, H>(
&'a self, &'a self,
func: F, func: F,
) -> Result<H, Vec<EngineError>> ) -> Result<H, EngineError>
where where
F: Fn(&'a Engine<EngineApi>) -> G, F: Fn(&'a Engine<EngineApi>) -> G,
G: Future<Output = Result<H, EngineApiError>>, G: Future<Output = Result<H, EngineApiError>>,
{ {
let mut errors = vec![];
let (engine_synced, engine_auth_failed) = { let (engine_synced, engine_auth_failed) = {
let state = self.engine.state.read().await; let state = self.engine.state.read().await;
( (
@ -309,32 +294,22 @@ impl Engines {
}; };
if engine_synced { if engine_synced {
match func(&self.engine).await { match func(&self.engine).await {
Ok(result) => return Ok(result), Ok(result) => Ok(result),
Err(error) => { Err(error) => {
debug!( debug!(
self.log, self.log,
"Execution engine call failed"; "Execution engine call failed";
"error" => ?error, "error" => ?error,
"id" => &&self.engine.id
); );
*self.engine.state.write().await = EngineState::Offline; *self.engine.state.write().await = EngineState::Offline;
errors.push(EngineError::Api { Err(EngineError::Api { error })
id: self.engine.id.clone(),
error,
})
} }
} }
} else if engine_auth_failed { } else if engine_auth_failed {
errors.push(EngineError::Auth { Err(EngineError::Auth)
id: self.engine.id.clone(),
})
} else { } else {
errors.push(EngineError::Offline { Err(EngineError::Offline)
id: self.engine.id.clone(),
})
} }
Err(errors)
} }
/// Runs `func` on the node. /// Runs `func` on the node.
@ -363,9 +338,7 @@ impl Engines {
{ {
let func = &func; let func = &func;
if *self.engine.state.read().await == EngineState::Offline { if *self.engine.state.read().await == EngineState::Offline {
Err(EngineError::Offline { Err(EngineError::Offline)
id: self.engine.id.clone(),
})
} else { } else {
match func(&self.engine).await { match func(&self.engine).await {
Ok(res) => Ok(res), Ok(res) => Ok(res),
@ -376,10 +349,7 @@ impl Engines {
"error" => ?error, "error" => ?error,
); );
*self.engine.state.write().await = EngineState::Offline; *self.engine.state.write().await = EngineState::Offline;
Err(EngineError::Api { Err(EngineError::Api { error })
id: self.engine.id.clone(),
error,
})
} }
} }
} }

View File

@ -12,7 +12,7 @@ pub use engine_api::{http, http::deposit_methods, http::HttpJsonRpc};
pub use engines::ForkChoiceState; pub use engines::ForkChoiceState;
use engines::{Engine, EngineError, Engines, Logging}; use engines::{Engine, EngineError, Engines, Logging};
use lru::LruCache; use lru::LruCache;
use payload_status::process_multiple_payload_statuses; use payload_status::process_payload_status;
pub use payload_status::PayloadStatus; pub use payload_status::PayloadStatus;
use sensitive_url::SensitiveUrl; use sensitive_url::SensitiveUrl;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
@ -68,11 +68,10 @@ pub enum Error {
NoPayloadBuilder, NoPayloadBuilder,
ApiError(ApiError), ApiError(ApiError),
Builder(builder_client::Error), Builder(builder_client::Error),
EngineErrors(Vec<EngineError>), EngineError(Box<EngineError>),
NotSynced, NotSynced,
ShuttingDown, ShuttingDown,
FeeRecipientUnspecified, FeeRecipientUnspecified,
ConsensusFailure,
MissingLatestValidHash, MissingLatestValidHash,
InvalidJWTSecret(String), InvalidJWTSecret(String),
} }
@ -200,12 +199,11 @@ impl<T: EthSpec> ExecutionLayer<T> {
}?; }?;
let engine: Engine<EngineApi> = { let engine: Engine<EngineApi> = {
let id = execution_url.to_string();
let auth = Auth::new(jwt_key, jwt_id, jwt_version); let auth = Auth::new(jwt_key, jwt_id, jwt_version);
debug!(log, "Loaded execution endpoint"; "endpoint" => %id, "jwt_path" => ?secret_file.as_path()); debug!(log, "Loaded execution endpoint"; "endpoint" => %execution_url, "jwt_path" => ?secret_file.as_path());
let api = HttpJsonRpc::<EngineApi>::new_with_auth(execution_url, auth) let api = HttpJsonRpc::<EngineApi>::new_with_auth(execution_url, auth)
.map_err(Error::ApiError)?; .map_err(Error::ApiError)?;
Engine::<EngineApi>::new(id, api) Engine::<EngineApi>::new(api)
}; };
let builder = builder_url let builder = builder_url
@ -709,7 +707,8 @@ impl<T: EthSpec> ExecutionLayer<T> {
}) })
}) })
.await .await
.map_err(Error::EngineErrors) .map_err(Box::new)
.map_err(Error::EngineError)
} }
/// Maps to the `engine_newPayload` JSON-RPC call. /// Maps to the `engine_newPayload` JSON-RPC call.
@ -742,16 +741,14 @@ impl<T: EthSpec> ExecutionLayer<T> {
"block_number" => execution_payload.block_number, "block_number" => execution_payload.block_number,
); );
let broadcast_results = self let broadcast_result = self
.engines() .engines()
.broadcast(|engine| engine.api.new_payload_v1(execution_payload.clone())) .broadcast(|engine| engine.api.new_payload_v1(execution_payload.clone()))
.await; .await;
process_multiple_payload_statuses( process_payload_status(execution_payload.block_hash, broadcast_result, self.log())
execution_payload.block_hash, .map_err(Box::new)
Some(broadcast_results).into_iter(), .map_err(Error::EngineError)
self.log(),
)
} }
/// Register that the given `validator_index` is going to produce a block at `slot`. /// Register that the given `validator_index` is going to produce a block at `slot`.
@ -879,7 +876,7 @@ impl<T: EthSpec> ExecutionLayer<T> {
.set_latest_forkchoice_state(forkchoice_state) .set_latest_forkchoice_state(forkchoice_state)
.await; .await;
let broadcast_results = self let broadcast_result = self
.engines() .engines()
.broadcast(|engine| async move { .broadcast(|engine| async move {
engine engine
@ -888,13 +885,13 @@ impl<T: EthSpec> ExecutionLayer<T> {
}) })
.await; .await;
process_multiple_payload_statuses( process_payload_status(
head_block_hash, head_block_hash,
Some(broadcast_results) broadcast_result.map(|response| response.payload_status),
.into_iter()
.map(|result| result.map(|response| response.payload_status)),
self.log(), self.log(),
) )
.map_err(Box::new)
.map_err(Error::EngineError)
} }
pub async fn exchange_transition_configuration(&self, spec: &ChainSpec) -> Result<(), Error> { pub async fn exchange_transition_configuration(&self, spec: &ChainSpec) -> Result<(), Error> {
@ -909,9 +906,6 @@ impl<T: EthSpec> ExecutionLayer<T> {
.broadcast(|engine| engine.api.exchange_transition_configuration_v1(local)) .broadcast(|engine| engine.api.exchange_transition_configuration_v1(local))
.await; .await;
let mut errors = vec![];
// Having no fallbacks, the id of the used node is 0
let i = 0usize;
match broadcast_result { match broadcast_result {
Ok(remote) => { Ok(remote) => {
if local.terminal_total_difficulty != remote.terminal_total_difficulty if local.terminal_total_difficulty != remote.terminal_total_difficulty
@ -922,20 +916,18 @@ impl<T: EthSpec> ExecutionLayer<T> {
"Execution client config mismatch"; "Execution client config mismatch";
"msg" => "ensure lighthouse and the execution client are up-to-date and \ "msg" => "ensure lighthouse and the execution client are up-to-date and \
configured consistently", configured consistently",
"execution_endpoint" => i,
"remote" => ?remote, "remote" => ?remote,
"local" => ?local, "local" => ?local,
); );
errors.push(EngineError::Api { Err(Error::EngineError(Box::new(EngineError::Api {
id: i.to_string(),
error: ApiError::TransitionConfigurationMismatch, error: ApiError::TransitionConfigurationMismatch,
}); })))
} else { } else {
debug!( debug!(
self.log(), self.log(),
"Execution client config is OK"; "Execution client config is OK";
"execution_endpoint" => i
); );
Ok(())
} }
} }
Err(e) => { Err(e) => {
@ -943,17 +935,10 @@ impl<T: EthSpec> ExecutionLayer<T> {
self.log(), self.log(),
"Unable to get transition config"; "Unable to get transition config";
"error" => ?e, "error" => ?e,
"execution_endpoint" => i,
); );
errors.push(e); Err(Error::EngineError(Box::new(e)))
} }
} }
if errors.is_empty() {
Ok(())
} else {
Err(Error::EngineErrors(errors))
}
} }
/// Used during block production to determine if the merge has been triggered. /// Used during block production to determine if the merge has been triggered.
@ -992,7 +977,8 @@ impl<T: EthSpec> ExecutionLayer<T> {
.await .await
}) })
.await .await
.map_err(Error::EngineErrors)?; .map_err(Box::new)
.map_err(Error::EngineError)?;
if let Some(hash) = &hash_opt { if let Some(hash) = &hash_opt {
info!( info!(
@ -1102,7 +1088,8 @@ impl<T: EthSpec> ExecutionLayer<T> {
Ok(None) Ok(None)
}) })
.await .await
.map_err(|e| Error::EngineErrors(vec![e])) .map_err(Box::new)
.map_err(Error::EngineError)
} }
/// This function should remain internal. /// This function should remain internal.
@ -1160,7 +1147,8 @@ impl<T: EthSpec> ExecutionLayer<T> {
.await .await
}) })
.await .await
.map_err(Error::EngineErrors) .map_err(Box::new)
.map_err(Error::EngineError)
} }
async fn get_payload_by_block_hash_from_engine( async fn get_payload_by_block_hash_from_engine(

View File

@ -1,7 +1,6 @@
use crate::engine_api::{Error as ApiError, PayloadStatusV1, PayloadStatusV1Status}; use crate::engine_api::{Error as ApiError, PayloadStatusV1, PayloadStatusV1Status};
use crate::engines::EngineError; use crate::engines::EngineError;
use crate::Error; use slog::{warn, Logger};
use slog::{crit, warn, Logger};
use types::ExecutionBlockHash; use types::ExecutionBlockHash;
/// Provides a simpler, easier to parse version of `PayloadStatusV1` for upstream users. /// Provides a simpler, easier to parse version of `PayloadStatusV1` for upstream users.
@ -24,38 +23,21 @@ pub enum PayloadStatus {
}, },
} }
/// Processes the responses from multiple execution engines, finding the "best" status and returning /// Processes the response from the execution engine.
/// it (if any). pub fn process_payload_status(
///
/// This function has the following basic goals:
///
/// - Detect a consensus failure between nodes.
/// - Find the most-synced node by preferring a definite response (valid/invalid) over a
/// syncing/accepted response or error.
///
/// # Details
///
/// - If there are conflicting valid/invalid responses, always return an error.
/// - If there are syncing/accepted responses but valid/invalid responses exist, return the
/// valid/invalid responses since they're definite.
/// - If there are multiple valid responses, return the first one processed.
/// - If there are multiple invalid responses, return the first one processed.
/// - Syncing/accepted responses are grouped, if there are multiple of them, return the first one
/// processed.
/// - If there are no responses (only errors or nothing), return an error.
pub fn process_multiple_payload_statuses(
head_block_hash: ExecutionBlockHash, head_block_hash: ExecutionBlockHash,
statuses: impl Iterator<Item = Result<PayloadStatusV1, EngineError>>, status: Result<PayloadStatusV1, EngineError>,
log: &Logger, log: &Logger,
) -> Result<PayloadStatus, Error> { ) -> Result<PayloadStatus, EngineError> {
let mut errors = vec![];
let mut valid_statuses = vec![];
let mut invalid_statuses = vec![];
let mut other_statuses = vec![];
for status in statuses {
match status { match status {
Err(e) => errors.push(e), Err(error) => {
warn!(
log,
"Error whilst processing payload status";
"error" => ?error,
);
Err(error)
}
Ok(response) => match &response.status { Ok(response) => match &response.status {
PayloadStatusV1Status::Valid => { PayloadStatusV1Status::Valid => {
if response if response
@ -64,35 +46,32 @@ pub fn process_multiple_payload_statuses(
{ {
// The response is only valid if `latest_valid_hash` is not `null` and // The response is only valid if `latest_valid_hash` is not `null` and
// equal to the provided `block_hash`. // equal to the provided `block_hash`.
valid_statuses.push(PayloadStatus::Valid) Ok(PayloadStatus::Valid)
} else { } else {
errors.push(EngineError::Api { let error = format!(
id: "unknown".to_string(),
error: ApiError::BadResponse(
format!(
"new_payload: response.status = VALID but invalid latest_valid_hash. Expected({:?}) Found({:?})", "new_payload: response.status = VALID but invalid latest_valid_hash. Expected({:?}) Found({:?})",
head_block_hash, head_block_hash,
response.latest_valid_hash, response.latest_valid_hash
) );
), Err(EngineError::Api {
}); error: ApiError::BadResponse(error),
})
} }
} }
PayloadStatusV1Status::Invalid => { PayloadStatusV1Status::Invalid => {
if let Some(latest_valid_hash) = response.latest_valid_hash { if let Some(latest_valid_hash) = response.latest_valid_hash {
// The response is only valid if `latest_valid_hash` is not `null`. // The response is only valid if `latest_valid_hash` is not `null`.
invalid_statuses.push(PayloadStatus::Invalid { Ok(PayloadStatus::Invalid {
latest_valid_hash, latest_valid_hash,
validation_error: response.validation_error.clone(), validation_error: response.validation_error.clone(),
}) })
} else { } else {
errors.push(EngineError::Api { Err(EngineError::Api {
id: "unknown".to_string(),
error: ApiError::BadResponse( error: ApiError::BadResponse(
"new_payload: response.status = INVALID but null latest_valid_hash" "new_payload: response.status = INVALID but null latest_valid_hash"
.to_string(), .to_string(),
), ),
}); })
} }
} }
PayloadStatusV1Status::InvalidBlockHash => { PayloadStatusV1Status::InvalidBlockHash => {
@ -107,9 +86,9 @@ pub fn process_multiple_payload_statuses(
) )
} }
invalid_statuses.push(PayloadStatus::InvalidBlockHash { Ok(PayloadStatus::InvalidBlockHash {
validation_error: response.validation_error.clone(), validation_error: response.validation_error.clone(),
}); })
} }
PayloadStatusV1Status::InvalidTerminalBlock => { PayloadStatusV1Status::InvalidTerminalBlock => {
// In the interests of being liberal with what we accept, only raise a // In the interests of being liberal with what we accept, only raise a
@ -123,9 +102,9 @@ pub fn process_multiple_payload_statuses(
) )
} }
invalid_statuses.push(PayloadStatus::InvalidTerminalBlock { Ok(PayloadStatus::InvalidTerminalBlock {
validation_error: response.validation_error.clone(), validation_error: response.validation_error.clone(),
}); })
} }
PayloadStatusV1Status::Syncing => { PayloadStatusV1Status::Syncing => {
// In the interests of being liberal with what we accept, only raise a // In the interests of being liberal with what we accept, only raise a
@ -139,7 +118,7 @@ pub fn process_multiple_payload_statuses(
) )
} }
other_statuses.push(PayloadStatus::Syncing) Ok(PayloadStatus::Syncing)
} }
PayloadStatusV1Status::Accepted => { PayloadStatusV1Status::Accepted => {
// In the interests of being liberal with what we accept, only raise a // In the interests of being liberal with what we accept, only raise a
@ -153,39 +132,8 @@ pub fn process_multiple_payload_statuses(
) )
} }
other_statuses.push(PayloadStatus::Accepted) Ok(PayloadStatus::Accepted)
} }
}, },
} }
}
if !valid_statuses.is_empty() && !invalid_statuses.is_empty() {
crit!(
log,
"Consensus failure between execution nodes";
"invalid_statuses" => ?invalid_statuses,
"valid_statuses" => ?valid_statuses,
);
// Choose to exit and ignore the valid response. This preferences correctness over
// liveness.
return Err(Error::ConsensusFailure);
}
// Log any errors to assist with troubleshooting.
for error in &errors {
warn!(
log,
"Error whilst processing payload status";
"error" => ?error,
);
}
valid_statuses
.first()
.or_else(|| invalid_statuses.first())
.or_else(|| other_statuses.first())
.cloned()
.map(Result::Ok)
.unwrap_or_else(|| Err(Error::EngineErrors(errors)))
} }