2021-09-29 22:14:15 +00:00
|
|
|
//! Provides generic behaviour for multiple execution engines, specifically fallback behaviour.
|
|
|
|
|
2021-11-15 06:13:38 +00:00
|
|
|
use crate::engine_api::{EngineApi, Error as EngineApiError, PayloadAttributes, PayloadId};
|
2021-09-29 22:14:15 +00:00
|
|
|
use futures::future::join_all;
|
2021-11-15 06:13:38 +00:00
|
|
|
use lru::LruCache;
|
2021-10-06 13:34:17 +00:00
|
|
|
use slog::{crit, debug, info, warn, Logger};
|
2021-09-29 22:14:15 +00:00
|
|
|
use std::future::Future;
|
2021-11-15 06:13:38 +00:00
|
|
|
use tokio::sync::{Mutex, RwLock};
|
|
|
|
use types::{Address, Hash256};
|
|
|
|
|
|
|
|
/// The number of payload IDs that will be stored for each `Engine`.
|
|
|
|
///
|
|
|
|
/// Since the size of each value is small (~100 bytes) a large number is used for safety.
|
|
|
|
const PAYLOAD_ID_LRU_CACHE_SIZE: usize = 512;
|
2021-09-29 22:14:15 +00:00
|
|
|
|
|
|
|
/// Stores the remembered state of a engine.
|
|
|
|
#[derive(Copy, Clone, PartialEq)]
|
|
|
|
enum EngineState {
|
2021-10-06 10:21:21 +00:00
|
|
|
Synced,
|
2021-09-29 22:14:15 +00:00
|
|
|
Offline,
|
2021-10-06 10:21:21 +00:00
|
|
|
Syncing,
|
2021-09-29 22:14:15 +00:00
|
|
|
}
|
|
|
|
|
2021-10-06 10:21:21 +00:00
|
|
|
#[derive(Copy, Clone, PartialEq, Debug)]
|
2021-11-15 06:13:38 +00:00
|
|
|
pub struct ForkChoiceState {
|
2021-10-06 10:21:21 +00:00
|
|
|
pub head_block_hash: Hash256,
|
2021-11-15 06:13:38 +00:00
|
|
|
pub safe_block_hash: Hash256,
|
2021-10-06 10:21:21 +00:00
|
|
|
pub finalized_block_hash: Hash256,
|
|
|
|
}
|
2021-09-29 22:14:15 +00:00
|
|
|
|
2021-10-06 10:21:21 +00:00
|
|
|
/// Used to enable/disable logging on some tasks.
|
|
|
|
#[derive(Copy, Clone, PartialEq)]
|
|
|
|
pub enum Logging {
|
|
|
|
Enabled,
|
|
|
|
Disabled,
|
|
|
|
}
|
2021-09-29 22:14:15 +00:00
|
|
|
|
2021-10-06 10:21:21 +00:00
|
|
|
impl Logging {
|
|
|
|
pub fn is_enabled(&self) -> bool {
|
|
|
|
match self {
|
|
|
|
Logging::Enabled => true,
|
|
|
|
Logging::Disabled => false,
|
|
|
|
}
|
2021-09-29 22:14:15 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-11-15 06:13:38 +00:00
|
|
|
#[derive(Hash, PartialEq, std::cmp::Eq)]
|
|
|
|
struct PayloadIdCacheKey {
|
|
|
|
pub head_block_hash: Hash256,
|
|
|
|
pub timestamp: u64,
|
|
|
|
pub random: Hash256,
|
|
|
|
pub fee_recipient: Address,
|
|
|
|
}
|
|
|
|
|
2021-09-29 22:14:15 +00:00
|
|
|
/// An execution engine.
|
|
|
|
pub struct Engine<T> {
|
|
|
|
pub id: String,
|
|
|
|
pub api: T,
|
2021-11-15 06:13:38 +00:00
|
|
|
payload_id_cache: Mutex<LruCache<PayloadIdCacheKey, PayloadId>>,
|
2021-09-29 22:14:15 +00:00
|
|
|
state: RwLock<EngineState>,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<T> Engine<T> {
|
|
|
|
/// Creates a new, offline engine.
|
|
|
|
pub fn new(id: String, api: T) -> Self {
|
|
|
|
Self {
|
|
|
|
id,
|
|
|
|
api,
|
2021-11-15 06:13:38 +00:00
|
|
|
payload_id_cache: Mutex::new(LruCache::new(PAYLOAD_ID_LRU_CACHE_SIZE)),
|
2021-09-29 22:14:15 +00:00
|
|
|
state: RwLock::new(EngineState::Offline),
|
|
|
|
}
|
|
|
|
}
|
2021-11-15 06:13:38 +00:00
|
|
|
|
|
|
|
pub async fn get_payload_id(
|
|
|
|
&self,
|
|
|
|
head_block_hash: Hash256,
|
|
|
|
timestamp: u64,
|
|
|
|
random: Hash256,
|
|
|
|
fee_recipient: Address,
|
|
|
|
) -> Option<PayloadId> {
|
|
|
|
self.payload_id_cache
|
|
|
|
.lock()
|
|
|
|
.await
|
|
|
|
.get(&PayloadIdCacheKey {
|
|
|
|
head_block_hash,
|
|
|
|
timestamp,
|
|
|
|
random,
|
|
|
|
fee_recipient,
|
|
|
|
})
|
|
|
|
.cloned()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<T: EngineApi> Engine<T> {
|
|
|
|
pub async fn notify_forkchoice_updated(
|
|
|
|
&self,
|
|
|
|
forkchoice_state: ForkChoiceState,
|
|
|
|
payload_attributes: Option<PayloadAttributes>,
|
|
|
|
log: &Logger,
|
|
|
|
) -> Result<Option<PayloadId>, EngineApiError> {
|
|
|
|
let response = self
|
|
|
|
.api
|
|
|
|
.forkchoice_updated_v1(forkchoice_state, payload_attributes)
|
|
|
|
.await?;
|
|
|
|
|
|
|
|
if let Some(payload_id) = response.payload_id {
|
|
|
|
if let Some(key) =
|
|
|
|
payload_attributes.map(|pa| PayloadIdCacheKey::new(&forkchoice_state, &pa))
|
|
|
|
{
|
|
|
|
self.payload_id_cache.lock().await.put(key, payload_id);
|
|
|
|
} else {
|
|
|
|
debug!(
|
|
|
|
log,
|
|
|
|
"Engine returned unexpected payload_id";
|
|
|
|
"payload_id" => ?payload_id
|
|
|
|
);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Ok(response.payload_id)
|
|
|
|
}
|
2021-09-29 22:14:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Holds multiple execution engines and provides functionality for managing them in a fallback
|
|
|
|
/// manner.
|
|
|
|
pub struct Engines<T> {
|
|
|
|
pub engines: Vec<Engine<T>>,
|
2021-11-15 06:13:38 +00:00
|
|
|
pub latest_forkchoice_state: RwLock<Option<ForkChoiceState>>,
|
2021-09-29 22:14:15 +00:00
|
|
|
pub log: Logger,
|
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Debug)]
|
|
|
|
pub enum EngineError {
|
|
|
|
Offline { id: String },
|
|
|
|
Api { id: String, error: EngineApiError },
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<T: EngineApi> Engines<T> {
|
2021-11-15 06:13:38 +00:00
|
|
|
async fn get_latest_forkchoice_state(&self) -> Option<ForkChoiceState> {
|
|
|
|
*self.latest_forkchoice_state.read().await
|
|
|
|
}
|
|
|
|
|
|
|
|
pub async fn set_latest_forkchoice_state(&self, state: ForkChoiceState) {
|
|
|
|
*self.latest_forkchoice_state.write().await = Some(state);
|
2021-10-06 10:21:21 +00:00
|
|
|
}
|
|
|
|
|
2021-11-15 06:13:38 +00:00
|
|
|
async fn send_latest_forkchoice_state(&self, engine: &Engine<T>) {
|
|
|
|
let latest_forkchoice_state = self.get_latest_forkchoice_state().await;
|
|
|
|
|
|
|
|
if let Some(forkchoice_state) = latest_forkchoice_state {
|
2021-10-06 10:21:21 +00:00
|
|
|
info!(
|
|
|
|
self.log,
|
|
|
|
"Issuing forkchoiceUpdated";
|
2021-11-15 06:13:38 +00:00
|
|
|
"forkchoice_state" => ?forkchoice_state,
|
2021-10-06 10:21:21 +00:00
|
|
|
"id" => &engine.id,
|
|
|
|
);
|
|
|
|
|
2021-11-15 06:13:38 +00:00
|
|
|
// For simplicity, payload attributes are never included in this call. It may be
|
|
|
|
// reasonable to include them in the future.
|
2021-10-06 10:21:21 +00:00
|
|
|
if let Err(e) = engine
|
|
|
|
.api
|
2021-11-15 06:13:38 +00:00
|
|
|
.forkchoice_updated_v1(forkchoice_state, None)
|
2021-10-06 10:21:21 +00:00
|
|
|
.await
|
|
|
|
{
|
2021-10-06 13:34:17 +00:00
|
|
|
debug!(
|
2021-10-06 10:21:21 +00:00
|
|
|
self.log,
|
|
|
|
"Failed to issue latest head to engine";
|
|
|
|
"error" => ?e,
|
|
|
|
"id" => &engine.id,
|
|
|
|
);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
debug!(
|
|
|
|
self.log,
|
|
|
|
"No head, not sending to engine";
|
|
|
|
"id" => &engine.id,
|
|
|
|
);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Returns `true` if there is at least one engine with a "synced" status.
|
|
|
|
pub async fn any_synced(&self) -> bool {
|
|
|
|
for engine in &self.engines {
|
|
|
|
if *engine.state.read().await == EngineState::Synced {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
false
|
|
|
|
}
|
|
|
|
|
2021-09-29 22:14:15 +00:00
|
|
|
/// Run the `EngineApi::upcheck` function on all nodes which are currently offline.
|
|
|
|
///
|
|
|
|
/// This can be used to try and recover any offline nodes.
|
2021-10-06 10:21:21 +00:00
|
|
|
pub async fn upcheck_not_synced(&self, logging: Logging) {
|
2021-09-29 22:14:15 +00:00
|
|
|
let upcheck_futures = self.engines.iter().map(|engine| async move {
|
2021-10-06 10:21:21 +00:00
|
|
|
let mut state_lock = engine.state.write().await;
|
|
|
|
if *state_lock != EngineState::Synced {
|
2021-09-29 22:14:15 +00:00
|
|
|
match engine.api.upcheck().await {
|
|
|
|
Ok(()) => {
|
2021-10-06 10:21:21 +00:00
|
|
|
if logging.is_enabled() {
|
|
|
|
info!(
|
|
|
|
self.log,
|
|
|
|
"Execution engine online";
|
|
|
|
"id" => &engine.id
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
2021-11-15 06:13:38 +00:00
|
|
|
// Send the node our latest forkchoice_state.
|
|
|
|
self.send_latest_forkchoice_state(engine).await;
|
2021-10-06 10:21:21 +00:00
|
|
|
|
|
|
|
*state_lock = EngineState::Synced
|
|
|
|
}
|
|
|
|
Err(EngineApiError::IsSyncing) => {
|
|
|
|
if logging.is_enabled() {
|
|
|
|
warn!(
|
|
|
|
self.log,
|
|
|
|
"Execution engine syncing";
|
|
|
|
"id" => &engine.id
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
2021-11-15 06:13:38 +00:00
|
|
|
// Send the node our latest forkchoice_state, it may assist with syncing.
|
|
|
|
self.send_latest_forkchoice_state(engine).await;
|
2021-10-06 10:21:21 +00:00
|
|
|
|
|
|
|
*state_lock = EngineState::Syncing
|
2021-09-29 22:14:15 +00:00
|
|
|
}
|
|
|
|
Err(e) => {
|
2021-10-06 10:21:21 +00:00
|
|
|
if logging.is_enabled() {
|
|
|
|
warn!(
|
|
|
|
self.log,
|
|
|
|
"Execution engine offline";
|
|
|
|
"error" => ?e,
|
|
|
|
"id" => &engine.id
|
|
|
|
)
|
|
|
|
}
|
2021-09-29 22:14:15 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2021-10-06 10:21:21 +00:00
|
|
|
*state_lock
|
2021-09-29 22:14:15 +00:00
|
|
|
});
|
|
|
|
|
2021-10-06 10:21:21 +00:00
|
|
|
let num_synced = join_all(upcheck_futures)
|
2021-09-29 22:14:15 +00:00
|
|
|
.await
|
|
|
|
.into_iter()
|
2021-10-06 10:21:21 +00:00
|
|
|
.filter(|state: &EngineState| *state == EngineState::Synced)
|
2021-09-29 22:14:15 +00:00
|
|
|
.count();
|
|
|
|
|
2021-10-06 10:21:21 +00:00
|
|
|
if num_synced == 0 && logging.is_enabled() {
|
2021-09-29 22:14:15 +00:00
|
|
|
crit!(
|
|
|
|
self.log,
|
2021-10-06 10:21:21 +00:00
|
|
|
"No synced execution engines";
|
2021-09-29 22:14:15 +00:00
|
|
|
)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Run `func` on all engines, in the order in which they are defined, returning the first
|
|
|
|
/// successful result that is found.
|
|
|
|
///
|
|
|
|
/// This function might try to run `func` twice. If all nodes return an error on the first time
|
|
|
|
/// it runs, it will try to upcheck all offline nodes and then run the function again.
|
|
|
|
pub async fn first_success<'a, F, G, H>(&'a self, func: F) -> Result<H, Vec<EngineError>>
|
|
|
|
where
|
|
|
|
F: Fn(&'a Engine<T>) -> G + Copy,
|
|
|
|
G: Future<Output = Result<H, EngineApiError>>,
|
|
|
|
{
|
|
|
|
match self.first_success_without_retry(func).await {
|
|
|
|
Ok(result) => Ok(result),
|
|
|
|
Err(mut first_errors) => {
|
|
|
|
// Try to recover some nodes.
|
2021-10-06 10:21:21 +00:00
|
|
|
self.upcheck_not_synced(Logging::Enabled).await;
|
2021-09-29 22:14:15 +00:00
|
|
|
// Retry the call on all nodes.
|
|
|
|
match self.first_success_without_retry(func).await {
|
|
|
|
Ok(result) => Ok(result),
|
|
|
|
Err(second_errors) => {
|
|
|
|
first_errors.extend(second_errors);
|
|
|
|
Err(first_errors)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Run `func` on all engines, in the order in which they are defined, returning the first
|
|
|
|
/// successful result that is found.
|
|
|
|
async fn first_success_without_retry<'a, F, G, H>(
|
|
|
|
&'a self,
|
|
|
|
func: F,
|
|
|
|
) -> Result<H, Vec<EngineError>>
|
|
|
|
where
|
|
|
|
F: Fn(&'a Engine<T>) -> G,
|
|
|
|
G: Future<Output = Result<H, EngineApiError>>,
|
|
|
|
{
|
|
|
|
let mut errors = vec![];
|
|
|
|
|
|
|
|
for engine in &self.engines {
|
2021-10-06 10:21:21 +00:00
|
|
|
let engine_synced = *engine.state.read().await == EngineState::Synced;
|
|
|
|
if engine_synced {
|
2021-09-29 22:14:15 +00:00
|
|
|
match func(engine).await {
|
|
|
|
Ok(result) => return Ok(result),
|
|
|
|
Err(error) => {
|
2021-10-06 13:34:17 +00:00
|
|
|
debug!(
|
2021-09-29 22:14:15 +00:00
|
|
|
self.log,
|
|
|
|
"Execution engine call failed";
|
|
|
|
"error" => ?error,
|
|
|
|
"id" => &engine.id
|
|
|
|
);
|
2021-10-06 10:21:21 +00:00
|
|
|
*engine.state.write().await = EngineState::Offline;
|
2021-09-29 22:14:15 +00:00
|
|
|
errors.push(EngineError::Api {
|
|
|
|
id: engine.id.clone(),
|
|
|
|
error,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
errors.push(EngineError::Offline {
|
|
|
|
id: engine.id.clone(),
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Err(errors)
|
|
|
|
}
|
|
|
|
|
2021-10-06 10:21:21 +00:00
|
|
|
/// Runs `func` on all nodes concurrently, returning all results. Any nodes that are offline
|
|
|
|
/// will be ignored, however all synced or unsynced nodes will receive the broadcast.
|
2021-09-29 22:14:15 +00:00
|
|
|
///
|
|
|
|
/// This function might try to run `func` twice. If all nodes return an error on the first time
|
|
|
|
/// it runs, it will try to upcheck all offline nodes and then run the function again.
|
|
|
|
pub async fn broadcast<'a, F, G, H>(&'a self, func: F) -> Vec<Result<H, EngineError>>
|
|
|
|
where
|
|
|
|
F: Fn(&'a Engine<T>) -> G + Copy,
|
|
|
|
G: Future<Output = Result<H, EngineApiError>>,
|
|
|
|
{
|
|
|
|
let first_results = self.broadcast_without_retry(func).await;
|
|
|
|
|
|
|
|
let mut any_offline = false;
|
|
|
|
for result in &first_results {
|
|
|
|
match result {
|
|
|
|
Ok(_) => return first_results,
|
|
|
|
Err(EngineError::Offline { .. }) => any_offline = true,
|
|
|
|
_ => (),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if any_offline {
|
2021-10-06 10:21:21 +00:00
|
|
|
self.upcheck_not_synced(Logging::Enabled).await;
|
2021-09-29 22:14:15 +00:00
|
|
|
self.broadcast_without_retry(func).await
|
|
|
|
} else {
|
|
|
|
first_results
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Runs `func` on all nodes concurrently, returning all results.
|
|
|
|
pub async fn broadcast_without_retry<'a, F, G, H>(
|
|
|
|
&'a self,
|
|
|
|
func: F,
|
|
|
|
) -> Vec<Result<H, EngineError>>
|
|
|
|
where
|
|
|
|
F: Fn(&'a Engine<T>) -> G,
|
|
|
|
G: Future<Output = Result<H, EngineApiError>>,
|
|
|
|
{
|
|
|
|
let func = &func;
|
|
|
|
let futures = self.engines.iter().map(|engine| async move {
|
2021-10-06 10:21:21 +00:00
|
|
|
let is_offline = *engine.state.read().await == EngineState::Offline;
|
|
|
|
if !is_offline {
|
2021-09-29 22:14:15 +00:00
|
|
|
func(engine).await.map_err(|error| {
|
2021-10-06 13:34:17 +00:00
|
|
|
debug!(
|
2021-09-29 22:14:15 +00:00
|
|
|
self.log,
|
|
|
|
"Execution engine call failed";
|
|
|
|
"error" => ?error,
|
|
|
|
"id" => &engine.id
|
|
|
|
);
|
|
|
|
EngineError::Api {
|
|
|
|
id: engine.id.clone(),
|
|
|
|
error,
|
|
|
|
}
|
|
|
|
})
|
|
|
|
} else {
|
|
|
|
Err(EngineError::Offline {
|
|
|
|
id: engine.id.clone(),
|
|
|
|
})
|
|
|
|
}
|
|
|
|
});
|
|
|
|
|
|
|
|
join_all(futures).await
|
|
|
|
}
|
|
|
|
}
|
2021-11-15 06:13:38 +00:00
|
|
|
|
|
|
|
impl PayloadIdCacheKey {
|
|
|
|
fn new(state: &ForkChoiceState, attributes: &PayloadAttributes) -> Self {
|
|
|
|
Self {
|
|
|
|
head_block_hash: state.head_block_hash,
|
|
|
|
timestamp: attributes.timestamp,
|
|
|
|
random: attributes.random,
|
|
|
|
fee_recipient: attributes.fee_recipient,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|