Initial work to remove engines fallback from the execution_layer crate (#3257)

## Issue Addressed

Part of #3160 

## Proposed Changes
Use only the first url given in the execution engine, if more than one is provided log it.
This change only moves having multiple engines to one. The amount of code cleanup that can and should be done forward is not small and would interfere with ongoing PRs. I'm keeping the changes intentionally very very minimal.

## Additional Info

Future works:
- In [ `EngineError` ](9c429d0764/beacon_node/execution_layer/src/engines.rs (L173-L177)) the id is not needed since it now has no meaning.
- the [ `first_success_without_retry` ](9c429d0764/beacon_node/execution_layer/src/engines.rs (L348-L351)) function can return a single error.
- the [`first_success`](9c429d0764/beacon_node/execution_layer/src/engines.rs (L324)) function can return a single error.
- After the redundancy is removed for the builders, we can probably make the [ `EngineErrors` ](9c429d0764/beacon_node/execution_layer/src/lib.rs (L69)) carry a single error.
- Merge the [`Engines`](9c429d0764/beacon_node/execution_layer/src/engines.rs (L161-L165)) struct and [`Engine` ](9c429d0764/beacon_node/execution_layer/src/engines.rs (L62-L67))
- Fix the associated configurations and cli params. Not sure if both are done in https://github.com/sigp/lighthouse/pull/3214

In general I think those changes can be done incrementally and in individual pull requests.
This commit is contained in:
Divma 2022-06-22 14:27:16 +00:00
parent 8faaa35b58
commit 2063c0fa0d
2 changed files with 201 additions and 285 deletions

View File

@ -156,10 +156,11 @@ impl Builder for Engine<BuilderApi> {
} }
} }
/// Holds multiple execution engines and provides functionality for managing them in a fallback // This structure used to hold multiple execution engines managed in a fallback manner. This
/// manner. // functionality has been removed following https://github.com/sigp/lighthouse/issues/3118 and this
// struct will likely be removed in the future.
pub struct Engines { pub struct Engines {
pub engines: Vec<Engine<EngineApi>>, pub engine: Engine<EngineApi>,
pub latest_forkchoice_state: RwLock<Option<ForkChoiceState>>, pub latest_forkchoice_state: RwLock<Option<ForkChoiceState>>,
pub log: Logger, pub log: Logger,
} }
@ -185,7 +186,7 @@ impl Engines {
*self.latest_forkchoice_state.write().await = Some(state); *self.latest_forkchoice_state.write().await = Some(state);
} }
async fn send_latest_forkchoice_state(&self, engine: &Engine<EngineApi>) { async fn send_latest_forkchoice_state(&self) {
let latest_forkchoice_state = self.get_latest_forkchoice_state().await; let latest_forkchoice_state = self.get_latest_forkchoice_state().await;
if let Some(forkchoice_state) = latest_forkchoice_state { if let Some(forkchoice_state) = latest_forkchoice_state {
@ -194,7 +195,7 @@ impl Engines {
self.log, self.log,
"No need to call forkchoiceUpdated"; "No need to call forkchoiceUpdated";
"msg" => "head does not have execution enabled", "msg" => "head does not have execution enabled",
"id" => &engine.id, "id" => &self.engine.id,
); );
return; return;
} }
@ -203,12 +204,13 @@ impl Engines {
self.log, self.log,
"Issuing forkchoiceUpdated"; "Issuing forkchoiceUpdated";
"forkchoice_state" => ?forkchoice_state, "forkchoice_state" => ?forkchoice_state,
"id" => &engine.id, "id" => &self.engine.id,
); );
// For simplicity, payload attributes are never included in this call. It may be // For simplicity, payload attributes are never included in this call. It may be
// reasonable to include them in the future. // reasonable to include them in the future.
if let Err(e) = engine if let Err(e) = self
.engine
.api .api
.forkchoice_updated_v1(forkchoice_state, None) .forkchoice_updated_v1(forkchoice_state, None)
.await .await
@ -217,47 +219,37 @@ impl Engines {
self.log, self.log,
"Failed to issue latest head to engine"; "Failed to issue latest head to engine";
"error" => ?e, "error" => ?e,
"id" => &engine.id, "id" => &self.engine.id,
); );
} }
} else { } else {
debug!( debug!(
self.log, self.log,
"No head, not sending to engine"; "No head, not sending to engine";
"id" => &engine.id, "id" => &self.engine.id,
); );
} }
} }
/// Returns `true` if there is at least one engine with a "synced" status. /// Returns `true` if the engine has a "synced" status.
pub async fn any_synced(&self) -> bool { pub async fn is_synced(&self) -> bool {
for engine in &self.engines { *self.engine.state.read().await == EngineState::Synced
if *engine.state.read().await == EngineState::Synced {
return true;
} }
} /// Run the `EngineApi::upcheck` function if the node's last known state is not synced. This
false /// might be used to recover the node if offline.
}
/// Run the `EngineApi::upcheck` function on all nodes which are currently offline.
///
/// This can be used to try and recover any offline nodes.
pub async fn upcheck_not_synced(&self, logging: Logging) { pub async fn upcheck_not_synced(&self, logging: Logging) {
let upcheck_futures = self.engines.iter().map(|engine| async move { let mut state_lock = self.engine.state.write().await;
let mut state_lock = engine.state.write().await;
if *state_lock != EngineState::Synced { if *state_lock != EngineState::Synced {
match engine.api.upcheck().await { match self.engine.api.upcheck().await {
Ok(()) => { Ok(()) => {
if logging.is_enabled() { if logging.is_enabled() {
info!( info!(
self.log, self.log,
"Execution engine online"; "Execution engine online";
"id" => &engine.id
); );
} }
// Send the node our latest forkchoice_state. // Send the node our latest forkchoice_state.
self.send_latest_forkchoice_state(engine).await; self.send_latest_forkchoice_state().await;
*state_lock = EngineState::Synced *state_lock = EngineState::Synced
} }
@ -266,12 +258,11 @@ impl Engines {
warn!( warn!(
self.log, self.log,
"Execution engine syncing"; "Execution engine syncing";
"id" => &engine.id
) )
} }
// Send the node our latest forkchoice_state, it may assist with syncing. // Send the node our latest forkchoice_state, it may assist with syncing.
self.send_latest_forkchoice_state(engine).await; self.send_latest_forkchoice_state().await;
*state_lock = EngineState::Syncing *state_lock = EngineState::Syncing
} }
@ -281,7 +272,6 @@ impl Engines {
self.log, self.log,
"Failed jwt authorization"; "Failed jwt authorization";
"error" => ?err, "error" => ?err,
"id" => &engine.id
); );
} }
@ -293,22 +283,13 @@ impl Engines {
self.log, self.log,
"Execution engine offline"; "Execution engine offline";
"error" => ?e, "error" => ?e,
"id" => &engine.id
) )
} }
} }
} }
} }
*state_lock
});
let num_synced = join_all(upcheck_futures) if *state_lock != EngineState::Synced && logging.is_enabled() {
.await
.into_iter()
.filter(|state: &EngineState| *state == EngineState::Synced)
.count();
if num_synced == 0 && logging.is_enabled() {
crit!( crit!(
self.log, self.log,
"No synced execution engines"; "No synced execution engines";
@ -355,111 +336,89 @@ impl Engines {
{ {
let mut errors = vec![]; let mut errors = vec![];
for engine in &self.engines {
let (engine_synced, engine_auth_failed) = { let (engine_synced, engine_auth_failed) = {
let state = engine.state.read().await; let state = self.engine.state.read().await;
( (
*state == EngineState::Synced, *state == EngineState::Synced,
*state == EngineState::AuthFailed, *state == EngineState::AuthFailed,
) )
}; };
if engine_synced { if engine_synced {
match func(engine).await { match func(&self.engine).await {
Ok(result) => return Ok(result), Ok(result) => return Ok(result),
Err(error) => { Err(error) => {
debug!( debug!(
self.log, self.log,
"Execution engine call failed"; "Execution engine call failed";
"error" => ?error, "error" => ?error,
"id" => &engine.id "id" => &&self.engine.id
); );
*engine.state.write().await = EngineState::Offline; *self.engine.state.write().await = EngineState::Offline;
errors.push(EngineError::Api { errors.push(EngineError::Api {
id: engine.id.clone(), id: self.engine.id.clone(),
error, error,
}) })
} }
} }
} else if engine_auth_failed { } else if engine_auth_failed {
errors.push(EngineError::Auth { errors.push(EngineError::Auth {
id: engine.id.clone(), id: self.engine.id.clone(),
}) })
} else { } else {
errors.push(EngineError::Offline { errors.push(EngineError::Offline {
id: engine.id.clone(), id: self.engine.id.clone(),
}) })
} }
}
Err(errors) Err(errors)
} }
/// Runs `func` on all nodes concurrently, returning all results. Any nodes that are offline /// Runs `func` on the node.
/// will be ignored, however all synced or unsynced nodes will receive the broadcast.
/// ///
/// This function might try to run `func` twice. If all nodes return an error on the first time /// This function might try to run `func` twice. If all nodes return an error on the first time
/// it runs, it will try to upcheck all offline nodes and then run the function again. /// it runs, it will try to upcheck all offline nodes and then run the function again.
pub async fn broadcast<'a, F, G, H>(&'a self, func: F) -> Vec<Result<H, EngineError>> pub async fn broadcast<'a, F, G, H>(&'a self, func: F) -> Result<H, EngineError>
where where
F: Fn(&'a Engine<EngineApi>) -> G + Copy, F: Fn(&'a Engine<EngineApi>) -> G + Copy,
G: Future<Output = Result<H, EngineApiError>>, G: Future<Output = Result<H, EngineApiError>>,
{ {
let first_results = self.broadcast_without_retry(func).await; match self.broadcast_without_retry(func).await {
Err(EngineError::Offline { .. }) => {
let mut any_offline = false;
for result in &first_results {
match result {
Ok(_) => return first_results,
Err(EngineError::Offline { .. }) => any_offline = true,
_ => (),
}
}
if any_offline {
self.upcheck_not_synced(Logging::Enabled).await; self.upcheck_not_synced(Logging::Enabled).await;
self.broadcast_without_retry(func).await self.broadcast_without_retry(func).await
} else { }
first_results other => other,
} }
} }
/// Runs `func` on all nodes concurrently, returning all results. /// Runs `func` on the node if it's last state is not offline.
pub async fn broadcast_without_retry<'a, F, G, H>( pub async fn broadcast_without_retry<'a, F, G, H>(&'a self, func: F) -> Result<H, EngineError>
&'a self,
func: F,
) -> Vec<Result<H, EngineError>>
where where
F: Fn(&'a Engine<EngineApi>) -> G, F: Fn(&'a Engine<EngineApi>) -> G,
G: Future<Output = Result<H, EngineApiError>>, G: Future<Output = Result<H, EngineApiError>>,
{ {
let func = &func; let func = &func;
let futures = self.engines.iter().map(|engine| async move { if *self.engine.state.read().await == EngineState::Offline {
let is_offline = *engine.state.read().await == EngineState::Offline; Err(EngineError::Offline {
if !is_offline { id: self.engine.id.clone(),
match func(engine).await { })
} else {
match func(&self.engine).await {
Ok(res) => Ok(res), Ok(res) => Ok(res),
Err(error) => { Err(error) => {
debug!( debug!(
self.log, self.log,
"Execution engine call failed"; "Execution engine call failed";
"error" => ?error, "error" => ?error,
"id" => &engine.id
); );
*engine.state.write().await = EngineState::Offline; *self.engine.state.write().await = EngineState::Offline;
Err(EngineError::Api { Err(EngineError::Api {
id: engine.id.clone(), id: self.engine.id.clone(),
error, error,
}) })
} }
} }
} else {
Err(EngineError::Offline {
id: engine.id.clone(),
})
} }
});
join_all(futures).await
} }
} }

View File

@ -158,72 +158,60 @@ impl ExecutionLayer {
let Config { let Config {
execution_endpoints: urls, execution_endpoints: urls,
builder_endpoints: builder_urls, builder_endpoints: builder_urls,
mut secret_files, secret_files,
suggested_fee_recipient, suggested_fee_recipient,
jwt_id, jwt_id,
jwt_version, jwt_version,
default_datadir, default_datadir,
} = config; } = config;
if urls.is_empty() { if urls.len() > 1 {
return Err(Error::NoEngines); warn!(log, "Only the first execution engine url will be used");
} }
let execution_url = urls.into_iter().next().ok_or(Error::NoEngines)?;
// Extend the jwt secret files with the default jwt secret path if not provided via cli. // Use the default jwt secret path if not provided via cli.
// This ensures that we have a jwt secret for every EL. let secret_file = secret_files
secret_files.extend(vec![ .into_iter()
default_datadir.join(DEFAULT_JWT_FILE); .next()
urls.len().saturating_sub(secret_files.len()) .unwrap_or_else(|| default_datadir.join(DEFAULT_JWT_FILE));
]);
let secrets: Vec<(JwtKey, PathBuf)> = secret_files let jwt_key = if secret_file.exists() {
.iter()
.map(|p| {
// Read secret from file if it already exists // Read secret from file if it already exists
if p.exists() { std::fs::read_to_string(&secret_file)
std::fs::read_to_string(p) .map_err(|e| format!("Failed to read JWT secret file. Error: {:?}", e))
.map_err(|e| {
format!("Failed to read JWT secret file {:?}, error: {:?}", p, e)
})
.and_then(|ref s| { .and_then(|ref s| {
let secret = JwtKey::from_slice( let secret = JwtKey::from_slice(
&hex::decode(strip_prefix(s.trim_end())) &hex::decode(strip_prefix(s.trim_end()))
.map_err(|e| format!("Invalid hex string: {:?}", e))?, .map_err(|e| format!("Invalid hex string: {:?}", e))?,
)?; )?;
Ok((secret, p.to_path_buf())) Ok(secret)
}) })
.map_err(Error::InvalidJWTSecret)
} else { } else {
// Create a new file and write a randomly generated secret to it if file does not exist // Create a new file and write a randomly generated secret to it if file does not exist
std::fs::File::options() std::fs::File::options()
.write(true) .write(true)
.create_new(true) .create_new(true)
.open(p) .open(&secret_file)
.map_err(|e| { .map_err(|e| format!("Failed to open JWT secret file. Error: {:?}", e))
format!("Failed to open JWT secret file {:?}, error: {:?}", p, e)
})
.and_then(|mut f| { .and_then(|mut f| {
let secret = auth::JwtKey::random(); let secret = auth::JwtKey::random();
f.write_all(secret.hex_string().as_bytes()).map_err(|e| { f.write_all(secret.hex_string().as_bytes())
format!("Failed to write to JWT secret file: {:?}", e) .map_err(|e| format!("Failed to write to JWT secret file: {:?}", e))?;
})?; Ok(secret)
Ok((secret, p.to_path_buf()))
}) })
} .map_err(Error::InvalidJWTSecret)
}) }?;
.collect::<Result<_, _>>()
.map_err(Error::InvalidJWTSecret)?;
let engines: Vec<Engine<EngineApi>> = urls let engine: Engine<EngineApi> = {
.into_iter() let id = execution_url.to_string();
.zip(secrets.into_iter()) let auth = Auth::new(jwt_key, jwt_id, jwt_version);
.map(|(url, (secret, path))| { debug!(log, "Loaded execution endpoint"; "endpoint" => %id, "jwt_path" => ?secret_file.as_path());
let id = url.to_string(); let api = HttpJsonRpc::<EngineApi>::new_with_auth(execution_url, auth)
let auth = Auth::new(secret, jwt_id.clone(), jwt_version.clone()); .map_err(Error::ApiError)?;
debug!(log, "Loaded execution endpoint"; "endpoint" => %id, "jwt_path" => ?path); Engine::<EngineApi>::new(id, api)
let api = HttpJsonRpc::<EngineApi>::new_with_auth(url, auth)?; };
Ok(Engine::<EngineApi>::new(id, api))
})
.collect::<Result<_, ApiError>>()?;
let builders: Vec<Engine<BuilderApi>> = builder_urls let builders: Vec<Engine<BuilderApi>> = builder_urls
.into_iter() .into_iter()
@ -236,7 +224,7 @@ impl ExecutionLayer {
let inner = Inner { let inner = Inner {
engines: Engines { engines: Engines {
engines, engine,
latest_forkchoice_state: <_>::default(), latest_forkchoice_state: <_>::default(),
log: log.clone(), log: log.clone(),
}, },
@ -455,7 +443,7 @@ impl ExecutionLayer {
/// Returns `true` if there is at least one synced and reachable engine. /// Returns `true` if there is at least one synced and reachable engine.
pub async fn is_synced(&self) -> bool { pub async fn is_synced(&self) -> bool {
self.engines().any_synced().await self.engines().is_synced().await
} }
/// Updates the proposer preparation data provided by validators /// Updates the proposer preparation data provided by validators
@ -750,7 +738,7 @@ impl ExecutionLayer {
process_multiple_payload_statuses( process_multiple_payload_statuses(
execution_payload.block_hash, execution_payload.block_hash,
broadcast_results.into_iter(), Some(broadcast_results).into_iter(),
self.log(), self.log(),
) )
} }
@ -903,7 +891,7 @@ impl ExecutionLayer {
}; };
process_multiple_payload_statuses( process_multiple_payload_statuses(
head_block_hash, head_block_hash,
broadcast_results Some(broadcast_results)
.into_iter() .into_iter()
.chain(builder_broadcast_results.into_iter()) .chain(builder_broadcast_results.into_iter())
.map(|result| result.map(|response| response.payload_status)), .map(|result| result.map(|response| response.payload_status)),
@ -918,14 +906,15 @@ impl ExecutionLayer {
terminal_block_number: 0, terminal_block_number: 0,
}; };
let broadcast_results = self let broadcast_result = self
.engines() .engines()
.broadcast(|engine| engine.api.exchange_transition_configuration_v1(local)) .broadcast(|engine| engine.api.exchange_transition_configuration_v1(local))
.await; .await;
let mut errors = vec![]; let mut errors = vec![];
for (i, result) in broadcast_results.into_iter().enumerate() { // Having no fallbacks, the id of the used node is 0
match result { let i = 0usize;
match broadcast_result {
Ok(remote) => { Ok(remote) => {
if local.terminal_total_difficulty != remote.terminal_total_difficulty if local.terminal_total_difficulty != remote.terminal_total_difficulty
|| local.terminal_block_hash != remote.terminal_block_hash || local.terminal_block_hash != remote.terminal_block_hash
@ -961,7 +950,6 @@ impl ExecutionLayer {
errors.push(e); errors.push(e);
} }
} }
}
if errors.is_empty() { if errors.is_empty() {
Ok(()) Ok(())
@ -1102,8 +1090,7 @@ impl ExecutionLayer {
&[metrics::IS_VALID_TERMINAL_POW_BLOCK_HASH], &[metrics::IS_VALID_TERMINAL_POW_BLOCK_HASH],
); );
let broadcast_results = self self.engines()
.engines()
.broadcast(|engine| async move { .broadcast(|engine| async move {
if let Some(pow_block) = self.get_pow_block(engine, block_hash).await? { if let Some(pow_block) = self.get_pow_block(engine, block_hash).await? {
if let Some(pow_parent) = if let Some(pow_parent) =
@ -1116,38 +1103,8 @@ impl ExecutionLayer {
} }
Ok(None) Ok(None)
}) })
.await; .await
.map_err(|e| Error::EngineErrors(vec![e]))
let mut errors = vec![];
let mut terminal = 0;
let mut not_terminal = 0;
let mut block_missing = 0;
for result in broadcast_results {
match result {
Ok(Some(true)) => terminal += 1,
Ok(Some(false)) => not_terminal += 1,
Ok(None) => block_missing += 1,
Err(e) => errors.push(e),
}
}
if terminal > 0 && not_terminal > 0 {
crit!(
self.log(),
"Consensus failure between execution nodes";
"method" => "is_valid_terminal_pow_block_hash"
);
}
if terminal > 0 {
Ok(Some(true))
} else if not_terminal > 0 {
Ok(Some(false))
} else if block_missing > 0 {
Ok(None)
} else {
Err(Error::EngineErrors(errors))
}
} }
/// This function should remain internal. /// This function should remain internal.