Adjust beacon node timeouts for validator client HTTP requests (#2352)

## Issue Addressed

Resolves #2313 

## Proposed Changes

Provide `BeaconNodeHttpClient` with a dedicated `Timeouts` struct.
This will allow granular adjustment of the timeout duration for different calls made from the VC to the BN. These can either be a constant value, or as a ratio of the slot duration.

Improve timeout performance by using these adjusted timeout duration's only whenever a fallback endpoint is available.

Add a CLI flag called `use-long-timeouts` to revert to the old behavior.

## Additional Info

Additionally set the default `BeaconNodeHttpClient` timeouts to the be the slot duration of the network, rather than a constant 12 seconds. This will allow it to adjust to different network specifications.


Co-authored-by: Paul Hauner <paul@paulhauner.com>
This commit is contained in:
Mac L 2021-07-12 01:47:48 +00:00
parent b4689e20c6
commit b3c7e59a5b
13 changed files with 234 additions and 23 deletions

View File

@ -4,7 +4,7 @@ use clap::{App, Arg, ArgMatches};
use environment::Environment;
use eth2::{
types::{GenesisData, StateId, ValidatorData, ValidatorId, ValidatorStatus},
BeaconNodeHttpClient,
BeaconNodeHttpClient, Timeouts,
};
use eth2_keystore::Keystore;
use eth2_network_config::Eth2NetworkConfig;
@ -81,6 +81,7 @@ pub fn cli_run<E: EthSpec>(matches: &ArgMatches, env: Environment<E>) -> Result<
let client = BeaconNodeHttpClient::new(
SensitiveUrl::parse(&server_url)
.map_err(|e| format!("Failed to parse beacon http server: {:?}", e))?,
Timeouts::set_all(Duration::from_secs(env.eth2_config.spec.seconds_per_slot)),
);
let testnet_config = env

View File

@ -9,7 +9,7 @@ use discv5::enr::{CombinedKey, EnrBuilder};
use environment::null_logger;
use eth2::Error;
use eth2::StatusCode;
use eth2::{types::*, BeaconNodeHttpClient};
use eth2::{types::*, BeaconNodeHttpClient, Timeouts};
use eth2_libp2p::{
rpc::methods::MetaData,
types::{EnrBitfield, SyncState},
@ -35,6 +35,7 @@ use types::{
type E = MainnetEthSpec;
const SECONDS_PER_SLOT: u64 = 12;
const SLOTS_PER_EPOCH: u64 = 32;
const VALIDATOR_COUNT: usize = SLOTS_PER_EPOCH as usize;
const CHAIN_LENGTH: u64 = SLOTS_PER_EPOCH * 5 - 1; // Make `next_block` an epoch transition
@ -213,6 +214,7 @@ impl ApiTester {
listening_socket.port()
))
.unwrap(),
Timeouts::set_all(Duration::from_secs(SECONDS_PER_SLOT)),
);
Self {
@ -327,6 +329,7 @@ impl ApiTester {
listening_socket.port()
))
.unwrap(),
Timeouts::set_all(Duration::from_secs(SECONDS_PER_SLOT)),
);
Self {

View File

@ -24,6 +24,7 @@ use serde::{de::DeserializeOwned, Serialize};
use std::convert::TryFrom;
use std::fmt;
use std::iter::Iterator;
use std::time::Duration;
#[derive(Debug)]
pub enum Error {
@ -76,12 +77,34 @@ impl fmt::Display for Error {
}
}
/// A struct to define a variety of different timeouts for different validator tasks to ensure
/// proper fallback behaviour.
#[derive(Clone)]
pub struct Timeouts {
pub attestation: Duration,
pub attester_duties: Duration,
pub proposal: Duration,
pub proposer_duties: Duration,
}
impl Timeouts {
pub fn set_all(timeout: Duration) -> Self {
Timeouts {
attestation: timeout,
attester_duties: timeout,
proposal: timeout,
proposer_duties: timeout,
}
}
}
/// A wrapper around `reqwest::Client` which provides convenience methods for interfacing with a
/// Lighthouse Beacon Node HTTP server (`http_api`).
#[derive(Clone)]
pub struct BeaconNodeHttpClient {
client: reqwest::Client,
server: SensitiveUrl,
timeouts: Timeouts,
}
impl fmt::Display for BeaconNodeHttpClient {
@ -97,15 +120,24 @@ impl AsRef<str> for BeaconNodeHttpClient {
}
impl BeaconNodeHttpClient {
pub fn new(server: SensitiveUrl) -> Self {
pub fn new(server: SensitiveUrl, timeouts: Timeouts) -> Self {
Self {
client: reqwest::Client::new(),
server,
timeouts,
}
}
pub fn from_components(server: SensitiveUrl, client: reqwest::Client) -> Self {
Self { client, server }
pub fn from_components(
server: SensitiveUrl,
client: reqwest::Client,
timeouts: Timeouts,
) -> Self {
Self {
client,
server,
timeouts,
}
}
/// Return the path with the standard `/eth1/v1` prefix applied.
@ -130,6 +162,26 @@ impl BeaconNodeHttpClient {
.map_err(Error::Reqwest)
}
/// Perform a HTTP GET request with a custom timeout.
async fn get_with_timeout<T: DeserializeOwned, U: IntoUrl>(
&self,
url: U,
timeout: Duration,
) -> Result<T, Error> {
let response = self
.client
.get(url)
.timeout(timeout)
.send()
.await
.map_err(Error::Reqwest)?;
ok_or_error(response)
.await?
.json()
.await
.map_err(Error::Reqwest)
}
/// Perform a HTTP GET request, returning `None` on a 404 error.
async fn get_opt<T: DeserializeOwned, U: IntoUrl>(&self, url: U) -> Result<Option<T>, Error> {
let response = self.client.get(url).send().await.map_err(Error::Reqwest)?;
@ -145,6 +197,31 @@ impl BeaconNodeHttpClient {
}
}
/// Perform a HTTP GET request with a custom timeout, returning `None` on a 404 error.
async fn get_opt_with_timeout<T: DeserializeOwned, U: IntoUrl>(
&self,
url: U,
timeout: Duration,
) -> Result<Option<T>, Error> {
let response = self
.client
.get(url)
.timeout(timeout)
.send()
.await
.map_err(Error::Reqwest)?;
match ok_or_error(response).await {
Ok(resp) => resp.json().await.map(Option::Some).map_err(Error::Reqwest),
Err(err) => {
if err.status() == Some(StatusCode::NOT_FOUND) {
Ok(None)
} else {
Err(err)
}
}
}
}
/// Perform a HTTP GET request using an 'accept' header, returning `None` on a 404 error.
pub async fn get_bytes_opt_accept_header<U: IntoUrl>(
&self,
@ -189,15 +266,36 @@ impl BeaconNodeHttpClient {
Ok(())
}
/// Perform a HTTP POST request, returning a JSON response.
async fn post_with_response<T: DeserializeOwned, U: IntoUrl, V: Serialize>(
/// Perform a HTTP POST request with a custom timeout.
async fn post_with_timeout<T: Serialize, U: IntoUrl>(
&self,
url: U,
body: &T,
timeout: Duration,
) -> Result<(), Error> {
let response = self
.client
.post(url)
.timeout(timeout)
.json(body)
.send()
.await
.map_err(Error::Reqwest)?;
ok_or_error(response).await?;
Ok(())
}
/// Perform a HTTP POST request with a custom timeout, returning a JSON response.
async fn post_with_timeout_and_response<T: DeserializeOwned, U: IntoUrl, V: Serialize>(
&self,
url: U,
body: &V,
timeout: Duration,
) -> Result<T, Error> {
let response = self
.client
.post(url)
.timeout(timeout)
.json(body)
.send()
.await
@ -468,7 +566,8 @@ impl BeaconNodeHttpClient {
.push("beacon")
.push("blocks");
self.post(path, block).await?;
self.post_with_timeout(path, block, self.timeouts.proposal)
.await?;
Ok(())
}
@ -567,6 +666,7 @@ impl BeaconNodeHttpClient {
let response = self
.client
.post(path)
.timeout(self.timeouts.attestation)
.json(attestations)
.send()
.await
@ -929,7 +1029,8 @@ impl BeaconNodeHttpClient {
.push("proposer")
.push(&epoch.to_string());
self.get(path).await
self.get_with_timeout(path, self.timeouts.proposer_duties)
.await
}
/// `GET validator/blocks/{slot}`
@ -975,10 +1076,10 @@ impl BeaconNodeHttpClient {
.append_pair("slot", &slot.to_string())
.append_pair("committee_index", &committee_index.to_string());
self.get(path).await
self.get_with_timeout(path, self.timeouts.attestation).await
}
/// `GET validator/attestation_attestation?slot,attestation_data_root`
/// `GET validator/aggregate_attestation?slot,attestation_data_root`
pub async fn get_validator_aggregate_attestation<T: EthSpec>(
&self,
slot: Slot,
@ -998,7 +1099,8 @@ impl BeaconNodeHttpClient {
&format!("{:?}", attestation_data_root),
);
self.get_opt(path).await
self.get_opt_with_timeout(path, self.timeouts.attestation)
.await
}
/// `POST validator/duties/attester/{epoch}`
@ -1016,7 +1118,8 @@ impl BeaconNodeHttpClient {
.push("attester")
.push(&epoch.to_string());
self.post_with_response(path, &indices).await
self.post_with_timeout_and_response(path, &indices, self.timeouts.attester_duties)
.await
}
/// `POST validator/aggregate_and_proofs`
@ -1034,6 +1137,7 @@ impl BeaconNodeHttpClient {
let response = self
.client
.post(path)
.timeout(self.timeouts.attestation)
.json(aggregates)
.send()
.await

View File

@ -194,6 +194,14 @@ fn init_slashing_protections_flag() {
.with_config(|config| assert!(config.init_slashing_protection));
}
#[test]
fn use_long_timeouts_flag() {
CommandLineTest::new()
.flag("use-long-timeouts", None)
.run()
.with_config(|config| assert!(config.use_long_timeouts));
}
// Tests for Graffiti flags.
#[test]
fn graffiti_flag() {

View File

@ -4,7 +4,7 @@
use beacon_node::ProductionBeaconNode;
use environment::RuntimeContext;
use eth2::{reqwest::ClientBuilder, BeaconNodeHttpClient};
use eth2::{reqwest::ClientBuilder, BeaconNodeHttpClient, Timeouts};
use sensitive_url::SensitiveUrl;
use std::path::PathBuf;
use std::time::Duration;
@ -77,6 +77,7 @@ impl<E: EthSpec> LocalBeaconNode<E> {
Ok(BeaconNodeHttpClient::from_components(
beacon_node_url,
beacon_node_http_client,
Timeouts::set_all(HTTP_TIMEOUT),
))
}
}

View File

@ -337,6 +337,10 @@ impl<T: SlotClock + 'static, E: EthSpec> AttestationService<T, E> {
let attestation_data = self
.beacon_nodes
.first_success(RequireSynced::No, |beacon_node| async move {
let _timer = metrics::start_timer_vec(
&metrics::ATTESTATION_SERVICE_TIMES,
&[metrics::ATTESTATIONS_HTTP_GET],
);
beacon_node
.get_validator_attestation_data(slot, committee_index)
.await
@ -399,6 +403,10 @@ impl<T: SlotClock + 'static, E: EthSpec> AttestationService<T, E> {
match self
.beacon_nodes
.first_success(RequireSynced::No, |beacon_node| async move {
let _timer = metrics::start_timer_vec(
&metrics::ATTESTATION_SERVICE_TIMES,
&[metrics::ATTESTATIONS_HTTP_POST],
);
beacon_node
.post_beacon_pool_attestations(attestations_slice)
.await
@ -451,6 +459,10 @@ impl<T: SlotClock + 'static, E: EthSpec> AttestationService<T, E> {
let aggregated_attestation = self
.beacon_nodes
.first_success(RequireSynced::No, |beacon_node| async move {
let _timer = metrics::start_timer_vec(
&metrics::ATTESTATION_SERVICE_TIMES,
&[metrics::AGGREGATES_HTTP_GET],
);
beacon_node
.get_validator_aggregate_attestation(
attestation_data_ref.slot,
@ -503,6 +515,10 @@ impl<T: SlotClock + 'static, E: EthSpec> AttestationService<T, E> {
match self
.beacon_nodes
.first_success(RequireSynced::No, |beacon_node| async move {
let _timer = metrics::start_timer_vec(
&metrics::ATTESTATION_SERVICE_TIMES,
&[metrics::AGGREGATES_HTTP_POST],
);
beacon_node
.post_validator_aggregate_and_proof(signed_aggregate_and_proofs_slice)
.await

View File

@ -263,17 +263,26 @@ impl<T: SlotClock + 'static, E: EthSpec> BlockService<T, E> {
let signed_block = self
.beacon_nodes
.first_success(RequireSynced::No, |beacon_node| async move {
let get_timer = metrics::start_timer_vec(
&metrics::BLOCK_SERVICE_TIMES,
&[metrics::BEACON_BLOCK_HTTP_GET],
);
let block = beacon_node
.get_validator_blocks(slot, randao_reveal_ref, graffiti.as_ref())
.await
.map_err(|e| format!("Error from beacon node when producing block: {:?}", e))?
.data;
drop(get_timer);
let signed_block = self_ref
.validator_store
.sign_block(validator_pubkey_ref, block, current_slot)
.ok_or("Unable to sign block")?;
let _post_timer = metrics::start_timer_vec(
&metrics::BLOCK_SERVICE_TIMES,
&[metrics::BEACON_BLOCK_HTTP_POST],
);
beacon_node
.post_beacon_blocks(&signed_block)
.await

View File

@ -94,6 +94,13 @@ pub fn cli_app<'a, 'b>() -> App<'a, 'b> {
node is not synced.",
),
)
.arg(
Arg::with_name("use-long-timeouts")
.long("use-long-timeouts")
.help("If present, the validator client will use longer timeouts for requests \
made to the beacon node. This flag is generally not recommended, \
longer timeouts can cause missed duties when fallbacks are used.")
)
// This overwrites the graffiti configured in the beacon node.
.arg(
Arg::with_name("graffiti")

View File

@ -35,6 +35,8 @@ pub struct Config {
pub disable_auto_discover: bool,
/// If true, re-register existing validators in definitions.yml for slashing protection.
pub init_slashing_protection: bool,
/// If true, use longer timeouts for requests made to the beacon node.
pub use_long_timeouts: bool,
/// Graffiti to be inserted everytime we create a block.
pub graffiti: Option<Graffiti>,
/// Graffiti file to load per validator graffitis.
@ -68,6 +70,7 @@ impl Default for Config {
allow_unsynced_beacon_node: false,
disable_auto_discover: false,
init_slashing_protection: false,
use_long_timeouts: false,
graffiti: None,
graffiti_file: None,
http_api: <_>::default(),
@ -156,6 +159,7 @@ impl Config {
config.allow_unsynced_beacon_node = cli_args.is_present("allow-unsynced");
config.disable_auto_discover = cli_args.is_present("disable-auto-discover");
config.init_slashing_protection = cli_args.is_present("init-slashing-protection");
config.use_long_timeouts = cli_args.is_present("use-long-timeouts");
if let Some(graffiti_file_path) = cli_args.value_of("graffiti-file") {
let mut graffiti_file = GraffitiFile::new(graffiti_file_path.into());

View File

@ -285,6 +285,10 @@ async fn poll_validator_indices<T: SlotClock + 'static, E: EthSpec>(
let download_result = duties_service
.beacon_nodes
.first_success(duties_service.require_synced, |beacon_node| async move {
let _timer = metrics::start_timer_vec(
&metrics::DUTIES_SERVICE_TIMES,
&[metrics::VALIDATOR_ID_HTTP_GET],
);
beacon_node
.get_beacon_states_validator_id(
StateId::Head,
@ -453,6 +457,10 @@ async fn poll_beacon_attesters<T: SlotClock + 'static, E: EthSpec>(
if let Err(e) = duties_service
.beacon_nodes
.first_success(duties_service.require_synced, |beacon_node| async move {
let _timer = metrics::start_timer_vec(
&metrics::DUTIES_SERVICE_TIMES,
&[metrics::SUBSCRIPTIONS_HTTP_POST],
);
beacon_node
.post_validator_beacon_committee_subscriptions(subscriptions_ref)
.await
@ -509,6 +517,10 @@ async fn poll_beacon_attesters_for_epoch<T: SlotClock + 'static, E: EthSpec>(
let response = duties_service
.beacon_nodes
.first_success(duties_service.require_synced, |beacon_node| async move {
let _timer = metrics::start_timer_vec(
&metrics::DUTIES_SERVICE_TIMES,
&[metrics::ATTESTER_DUTIES_HTTP_POST],
);
beacon_node
.post_validator_duties_attester(epoch, local_indices)
.await
@ -640,6 +652,10 @@ async fn poll_beacon_proposers<T: SlotClock + 'static, E: EthSpec>(
let download_result = duties_service
.beacon_nodes
.first_success(duties_service.require_synced, |beacon_node| async move {
let _timer = metrics::start_timer_vec(
&metrics::DUTIES_SERVICE_TIMES,
&[metrics::PROPOSER_DUTIES_HTTP_GET],
);
beacon_node
.get_validator_duties_proposer(current_epoch)
.await

View File

@ -84,6 +84,7 @@ impl<E: EthSpec> ForkServiceBuilder<slot_clock::TestingSlotClock, E> {
);
let candidates = vec![CandidateBeaconNode::new(eth2::BeaconNodeHttpClient::new(
sensitive_url::SensitiveUrl::parse("http://127.0.0.1").unwrap(),
eth2::Timeouts::set_all(Duration::from_secs(12)),
))];
let mut beacon_nodes = BeaconNodeFallback::new(candidates, spec, log.clone());
beacon_nodes.set_slot_clock(slot_clock);

View File

@ -9,8 +9,14 @@ pub const SAME_DATA: &str = "same_data";
pub const UNREGISTERED: &str = "unregistered";
pub const FULL_UPDATE: &str = "full_update";
pub const BEACON_BLOCK: &str = "beacon_block";
pub const BEACON_BLOCK_HTTP_GET: &str = "beacon_block_http_get";
pub const BEACON_BLOCK_HTTP_POST: &str = "beacon_block_http_post";
pub const ATTESTATIONS: &str = "attestations";
pub const ATTESTATIONS_HTTP_GET: &str = "attestations_http_get";
pub const ATTESTATIONS_HTTP_POST: &str = "attestations_http_post";
pub const AGGREGATES: &str = "aggregates";
pub const AGGREGATES_HTTP_GET: &str = "aggregates_http_get";
pub const AGGREGATES_HTTP_POST: &str = "aggregates_http_post";
pub const CURRENT_EPOCH: &str = "current_epoch";
pub const NEXT_EPOCH: &str = "next_epoch";
pub const UPDATE_INDICES: &str = "update_indices";
@ -18,6 +24,10 @@ pub const UPDATE_ATTESTERS_CURRENT_EPOCH: &str = "update_attesters_current_epoch
pub const UPDATE_ATTESTERS_NEXT_EPOCH: &str = "update_attesters_next_epoch";
pub const UPDATE_ATTESTERS_FETCH: &str = "update_attesters_fetch";
pub const UPDATE_ATTESTERS_STORE: &str = "update_attesters_store";
pub const ATTESTER_DUTIES_HTTP_POST: &str = "attester_duties_http_post";
pub const PROPOSER_DUTIES_HTTP_GET: &str = "proposer_duties_http_get";
pub const VALIDATOR_ID_HTTP_GET: &str = "validator_id_http_get";
pub const SUBSCRIPTIONS_HTTP_POST: &str = "subscriptions_http_post";
pub const UPDATE_PROPOSERS: &str = "update_proposers";
pub const SUBSCRIPTIONS: &str = "subscriptions";

View File

@ -30,7 +30,7 @@ use clap::ArgMatches;
use duties_service::DutiesService;
use environment::RuntimeContext;
use eth2::types::StateId;
use eth2::{reqwest::ClientBuilder, BeaconNodeHttpClient, StatusCode};
use eth2::{reqwest::ClientBuilder, BeaconNodeHttpClient, StatusCode, Timeouts};
use fork_service::{ForkService, ForkServiceBuilder};
use http_api::ApiSecret;
use initialized_validators::InitializedValidators;
@ -57,8 +57,12 @@ const RETRY_DELAY: Duration = Duration::from_secs(2);
/// The time between polls when waiting for genesis.
const WAITING_FOR_GENESIS_POLL_TIME: Duration = Duration::from_secs(12);
/// The global timeout for HTTP requests to the beacon node.
const HTTP_TIMEOUT: Duration = Duration::from_secs(12);
/// Specific timeout constants for HTTP requests involved in different validator duties.
/// This can help ensure that proper endpoint fallback occurs.
const HTTP_ATTESTATION_TIMEOUT_QUOTIENT: u32 = 4;
const HTTP_ATTESTER_DUTIES_TIMEOUT_QUOTIENT: u32 = 4;
const HTTP_PROPOSAL_TIMEOUT_QUOTIENT: u32 = 2;
const HTTP_PROPOSER_DUTIES_TIMEOUT_QUOTIENT: u32 = 4;
#[derive(Clone)]
pub struct ProductionValidatorClient<T: EthSpec> {
@ -222,18 +226,45 @@ impl<T: EthSpec> ProductionValidatorClient<T> {
})?;
}
let last_beacon_node_index = config
.beacon_nodes
.len()
.checked_sub(1)
.ok_or_else(|| "No beacon nodes defined.".to_string())?;
let beacon_nodes: Vec<BeaconNodeHttpClient> = config
.beacon_nodes
.clone()
.into_iter()
.map(|url| {
.iter()
.enumerate()
.map(|(i, url)| {
let slot_duration = Duration::from_secs(context.eth2_config.spec.seconds_per_slot);
let beacon_node_http_client = ClientBuilder::new()
.timeout(HTTP_TIMEOUT)
// Set default timeout to be the full slot duration.
.timeout(slot_duration)
.build()
.map_err(|e| format!("Unable to build HTTP client: {:?}", e))?;
// Use quicker timeouts if a fallback beacon node exists.
let timeouts = if i < last_beacon_node_index && !config.use_long_timeouts {
info!(
log,
"Fallback endpoints are available, using optimized timeouts.";
);
Timeouts {
attestation: slot_duration / HTTP_ATTESTATION_TIMEOUT_QUOTIENT,
attester_duties: slot_duration / HTTP_ATTESTER_DUTIES_TIMEOUT_QUOTIENT,
proposal: slot_duration / HTTP_PROPOSAL_TIMEOUT_QUOTIENT,
proposer_duties: slot_duration / HTTP_PROPOSER_DUTIES_TIMEOUT_QUOTIENT,
}
} else {
Timeouts::set_all(slot_duration)
};
Ok(BeaconNodeHttpClient::from_components(
url,
url.clone(),
beacon_node_http_client,
timeouts,
))
})
.collect::<Result<Vec<BeaconNodeHttpClient>, String>>()?;
@ -244,7 +275,7 @@ impl<T: EthSpec> ProductionValidatorClient<T> {
.map(CandidateBeaconNode::new)
.collect();
// Set the count for beacon node fallbacks excluding the primary beacon node
// Set the count for beacon node fallbacks excluding the primary beacon node.
set_gauge(
&http_metrics::metrics::ETH2_FALLBACK_CONFIGURED,
num_nodes.saturating_sub(1) as i64,