From 7456e1e8faac7a581705f8e71b0dc4f09a36ee5c Mon Sep 17 00:00:00 2001 From: Age Manning Date: Wed, 26 Apr 2023 01:12:36 +0000 Subject: [PATCH] Separate BN for block proposals (#4182) It is a well-known fact that IP addresses for beacon nodes used by specific validators can be de-anonymized. There is an assumed risk that a malicious user may attempt to DOS validators when producing blocks to prevent chain growth/liveness. Although there are a number of ideas put forward to address this, there a few simple approaches we can take to mitigate this risk. Currently, a Lighthouse user is able to set a number of beacon-nodes that their validator client can connect to. If one beacon node is taken offline, it can fallback to another. Different beacon nodes can use VPNs or rotate IPs in order to mask their IPs. This PR provides an additional setup option which further mitigates attacks of this kind. This PR introduces a CLI flag --proposer-only to the beacon node. Setting this flag will configure the beacon node to run with minimal peers and crucially will not subscribe to subnets or sync committees. Therefore nodes of this kind should not be identified as nodes connected to validators of any kind. It also introduces a CLI flag --proposer-nodes to the validator client. Users can then provide a number of beacon nodes (which may or may not run the --proposer-only flag) that the Validator client will use for block production and propagation only. If these nodes fail, the validator client will fallback to the default list of beacon nodes. Users are then able to set up a number of beacon nodes dedicated to block proposals (which are unlikely to be identified as validator nodes) and point their validator clients to produce blocks on these nodes and attest on other beacon nodes. An attack attempting to prevent liveness on the eth2 network would then need to preemptively find and attack the proposer nodes which is significantly more difficult than the default setup. This is a follow on from: #3328 Co-authored-by: Michael Sproul Co-authored-by: Paul Hauner --- beacon_node/lighthouse_network/src/config.rs | 4 + .../src/subnet_service/attestation_subnets.rs | 13 ++ .../src/subnet_service/sync_subnets.rs | 9 + beacon_node/src/cli.rs | 10 +- beacon_node/src/config.rs | 17 ++ book/src/SUMMARY.md | 1 + book/src/advanced-proposer-only.md | 71 ++++++++ testing/simulator/src/cli.rs | 12 ++ testing/simulator/src/eth1_sim.rs | 21 ++- testing/simulator/src/local_network.rs | 51 +++++- testing/simulator/src/no_eth1_sim.rs | 6 +- testing/simulator/src/sync_sim.rs | 16 +- validator_client/src/block_service.rs | 102 ++++++++++- validator_client/src/cli.rs | 11 +- validator_client/src/config.rs | 11 ++ validator_client/src/lib.rs | 172 ++++++++++++------ 16 files changed, 452 insertions(+), 75 deletions(-) create mode 100644 book/src/advanced-proposer-only.md diff --git a/beacon_node/lighthouse_network/src/config.rs b/beacon_node/lighthouse_network/src/config.rs index d8efa2020..f4b3b78d0 100644 --- a/beacon_node/lighthouse_network/src/config.rs +++ b/beacon_node/lighthouse_network/src/config.rs @@ -134,6 +134,9 @@ pub struct Config { /// List of extra topics to initially subscribe to as strings. pub topics: Vec, + /// Whether we are running a block proposer only node. + pub proposer_only: bool, + /// Whether metrics are enabled. pub metrics_enabled: bool, @@ -322,6 +325,7 @@ impl Default for Config { import_all_attestations: false, shutdown_after_sync: false, topics: Vec::new(), + proposer_only: false, metrics_enabled: false, enable_light_client_server: false, outbound_rate_limiter_config: None, diff --git a/beacon_node/network/src/subnet_service/attestation_subnets.rs b/beacon_node/network/src/subnet_service/attestation_subnets.rs index 70ba1c817..e46a52cfb 100644 --- a/beacon_node/network/src/subnet_service/attestation_subnets.rs +++ b/beacon_node/network/src/subnet_service/attestation_subnets.rs @@ -112,6 +112,9 @@ pub struct AttestationService { #[cfg(feature = "deterministic_long_lived_attnets")] next_long_lived_subscription_event: Pin>, + /// Whether this node is a block proposer-only node. + proposer_only: bool, + /// The logger for the attestation service. log: slog::Logger, } @@ -155,6 +158,7 @@ impl AttestationService { known_validators: HashSetDelay::new(last_seen_val_timeout), waker: None, discovery_disabled: config.disable_discovery, + proposer_only: config.proposer_only, subscribe_all_subnets: config.subscribe_all_subnets, long_lived_subnet_subscription_slots, log, @@ -256,6 +260,11 @@ impl AttestationService { &mut self, subscriptions: Vec, ) -> Result<(), String> { + // If the node is in a proposer-only state, we ignore all subnet subscriptions. + if self.proposer_only { + return Ok(()); + } + // Maps each subnet_id subscription to it's highest slot let mut subnets_to_discover: HashMap = HashMap::new(); for subscription in subscriptions { @@ -450,6 +459,10 @@ impl AttestationService { subnet: SubnetId, attestation: &Attestation, ) -> bool { + // Proposer-only mode does not need to process attestations + if self.proposer_only { + return false; + } self.aggregate_validators_on_subnet .as_ref() .map(|tracked_vals| { diff --git a/beacon_node/network/src/subnet_service/sync_subnets.rs b/beacon_node/network/src/subnet_service/sync_subnets.rs index 0b27ff527..eda7ce8ef 100644 --- a/beacon_node/network/src/subnet_service/sync_subnets.rs +++ b/beacon_node/network/src/subnet_service/sync_subnets.rs @@ -54,6 +54,9 @@ pub struct SyncCommitteeService { /// We are always subscribed to all subnets. subscribe_all_subnets: bool, + /// Whether this node is a block proposer-only node. + proposer_only: bool, + /// The logger for the attestation service. log: slog::Logger, } @@ -82,6 +85,7 @@ impl SyncCommitteeService { waker: None, subscribe_all_subnets: config.subscribe_all_subnets, discovery_disabled: config.disable_discovery, + proposer_only: config.proposer_only, log, } } @@ -110,6 +114,11 @@ impl SyncCommitteeService { &mut self, subscriptions: Vec, ) -> Result<(), String> { + // A proposer-only node does not subscribe to any sync-committees + if self.proposer_only { + return Ok(()); + } + let mut subnets_to_discover = Vec::new(); for subscription in subscriptions { metrics::inc_counter(&metrics::SYNC_COMMITTEE_SUBSCRIPTION_REQUESTS); diff --git a/beacon_node/src/cli.rs b/beacon_node/src/cli.rs index 25521ec24..a578ac7ea 100644 --- a/beacon_node/src/cli.rs +++ b/beacon_node/src/cli.rs @@ -123,7 +123,6 @@ pub fn cli_app<'a, 'b>() -> App<'a, 'b> { Arg::with_name("target-peers") .long("target-peers") .help("The target number of peers.") - .default_value("80") .takes_value(true), ) .arg( @@ -269,6 +268,15 @@ pub fn cli_app<'a, 'b>() -> App<'a, 'b> { .min_values(0) .hidden(true) ) + .arg( + Arg::with_name("proposer-only") + .long("proposer-only") + .help("Sets this beacon node at be a block proposer only node. \ + This will run the beacon node in a minimal configuration that is sufficient for block publishing only. This flag should be used \ + for a beacon node being referenced by validator client using the --proposer-node flag. This configuration is for enabling more secure setups.") + .takes_value(false), + ) + .arg( Arg::with_name("disable-backfill-rate-limiting") .long("disable-backfill-rate-limiting") diff --git a/beacon_node/src/config.rs b/beacon_node/src/config.rs index 8cc38a534..7cd2a6279 100644 --- a/beacon_node/src/config.rs +++ b/beacon_node/src/config.rs @@ -979,10 +979,13 @@ pub fn set_network_config( config.set_listening_addr(parse_listening_addresses(cli_args, log)?); + // A custom target-peers command will overwrite the --proposer-only default. if let Some(target_peers_str) = cli_args.value_of("target-peers") { config.target_peers = target_peers_str .parse::() .map_err(|_| format!("Invalid number of target peers: {}", target_peers_str))?; + } else { + config.target_peers = 80; // default value } if let Some(value) = cli_args.value_of("network-load") { @@ -1218,6 +1221,20 @@ pub fn set_network_config( config.outbound_rate_limiter_config = Some(Default::default()); } + // Proposer-only mode overrides a number of previous configuration parameters. + // Specifically, we avoid subscribing to long-lived subnets and wish to maintain a minimal set + // of peers. + if cli_args.is_present("proposer-only") { + config.subscribe_all_subnets = false; + + if cli_args.value_of("target-peers").is_none() { + // If a custom value is not set, change the default to 15 + config.target_peers = 15; + } + config.proposer_only = true; + warn!(log, "Proposer-only mode enabled"; "info"=> "Do not connect a validator client to this node unless via the --proposer-nodes flag"); + } + Ok(()) } diff --git a/book/src/SUMMARY.md b/book/src/SUMMARY.md index ff5c1e980..83429dc3d 100644 --- a/book/src/SUMMARY.md +++ b/book/src/SUMMARY.md @@ -41,6 +41,7 @@ * [Checkpoint Sync](./checkpoint-sync.md) * [Custom Data Directories](./advanced-datadir.md) * [Validator Graffiti](./graffiti.md) + * [Proposer Only Beacon Nodes](./advanced-proposer-only.md) * [Remote Signing with Web3Signer](./validator-web3signer.md) * [Database Configuration](./advanced_database.md) * [Database Migrations](./database-migrations.md) diff --git a/book/src/advanced-proposer-only.md b/book/src/advanced-proposer-only.md new file mode 100644 index 000000000..c3347e044 --- /dev/null +++ b/book/src/advanced-proposer-only.md @@ -0,0 +1,71 @@ +# Advanced Proposer-Only Beacon Nodes + +Lighthouse allows for more exotic setups that can minimize attack vectors by +adding redundant beacon nodes and dividing the roles of attesting and block +production between them. + +The purpose of this is to minimize attack vectors +where malicious users obtain the network identities (IP addresses) of beacon +nodes corresponding to individual validators and subsequently perform Denial Of Service +attacks on the beacon nodes when they are due to produce a block on the +network. By splitting the duties of attestation and block production across +different beacon nodes, an attacker may not know which node is the block +production node, especially if the user rotates IP addresses of the block +production beacon node in between block proposals (this is in-frequent with +networks with large validator counts). + +## The Beacon Node + +A Lighthouse beacon node can be configured with the `--proposer-only` flag +(i.e. `lighthouse bn --proposer-only`). +Setting a beacon node with this flag will limit its use as a beacon node for +normal activities such as performing attestations, but it will make the node +harder to identify as a potential node to attack and will also consume less +resources. + +Specifically, this flag reduces the default peer count (to a safe minimal +number as maintaining peers on attestation subnets do not need to be considered), +prevents the node from subscribing to any attestation-subnets or +sync-committees which is a primary way for attackers to de-anonymize +validators. + +> Note: Beacon nodes that have set the `--proposer-only` flag should not be connected +> to validator clients unless via the `--proposer-nodes` flag. If connected as a +> normal beacon node, the validator may fail to handle its duties correctly and +> result in a loss of income. + + +## The Validator Client + +The validator client can be given a list of HTTP API endpoints representing +beacon nodes that will be solely used for block propagation on the network, via +the CLI flag `--proposer-nodes`. These nodes can be any working beacon nodes +and do not specifically have to be proposer-only beacon nodes that have been +executed with the `--proposer-only` (although we do recommend this flag for +these nodes for added security). + +> Note: The validator client still requires at least one other beacon node to +> perform its duties and must be specified in the usual `--beacon-nodes` flag. + +> Note: The validator client will attempt to get a block to propose from the +> beacon nodes specified in `--beacon-nodes` before trying `--proposer-nodes`. +> This is because the nodes subscribed to subnets have a higher chance of +> producing a more profitable block. Any block builders should therefore be +> attached to the `--beacon-nodes` and not necessarily the `--proposer-nodes`. + + +## Setup Overview + +The intended set-up to take advantage of this mechanism is to run one (or more) +normal beacon nodes in conjunction with one (or more) proposer-only beacon +nodes. See the [Redundancy](./redundancy.md) section for more information about +setting up redundant beacon nodes. The proposer-only beacon nodes should be +setup to use a different IP address than the primary (non proposer-only) nodes. +For added security, the IP addresses of the proposer-only nodes should be +rotated occasionally such that a new IP-address is used per block proposal. + +A single validator client can then connect to all of the above nodes via the +`--beacon-nodes` and `--proposer-nodes` flags. The resulting setup will allow +the validator client to perform its regular duties on the standard beacon nodes +and when the time comes to propose a block, it will send this block via the +specified proposer-only nodes. diff --git a/testing/simulator/src/cli.rs b/testing/simulator/src/cli.rs index f1196502f..9668ee8cb 100644 --- a/testing/simulator/src/cli.rs +++ b/testing/simulator/src/cli.rs @@ -24,6 +24,12 @@ pub fn cli_app<'a, 'b>() -> App<'a, 'b> { .takes_value(true) .default_value("4") .help("Number of beacon nodes")) + .arg(Arg::with_name("proposer-nodes") + .short("n") + .long("nodes") + .takes_value(true) + .default_value("2") + .help("Number of proposer-only beacon nodes")) .arg(Arg::with_name("validators_per_node") .short("v") .long("validators_per_node") @@ -57,6 +63,12 @@ pub fn cli_app<'a, 'b>() -> App<'a, 'b> { .takes_value(true) .default_value("4") .help("Number of beacon nodes")) + .arg(Arg::with_name("proposer-nodes") + .short("n") + .long("nodes") + .takes_value(true) + .default_value("2") + .help("Number of proposer-only beacon nodes")) .arg(Arg::with_name("validators_per_node") .short("v") .long("validators_per_node") diff --git a/testing/simulator/src/eth1_sim.rs b/testing/simulator/src/eth1_sim.rs index 43e8a5cf4..1699c0e9e 100644 --- a/testing/simulator/src/eth1_sim.rs +++ b/testing/simulator/src/eth1_sim.rs @@ -27,6 +27,8 @@ const SUGGESTED_FEE_RECIPIENT: [u8; 20] = pub fn run_eth1_sim(matches: &ArgMatches) -> Result<(), String> { let node_count = value_t!(matches, "nodes", usize).expect("missing nodes default"); + let proposer_nodes = value_t!(matches, "proposer-nodes", usize).unwrap_or(0); + println!("PROPOSER-NODES: {}", proposer_nodes); let validators_per_node = value_t!(matches, "validators_per_node", usize) .expect("missing validators_per_node default"); let speed_up_factor = @@ -35,7 +37,8 @@ pub fn run_eth1_sim(matches: &ArgMatches) -> Result<(), String> { let post_merge_sim = matches.is_present("post-merge"); println!("Beacon Chain Simulator:"); - println!(" nodes:{}", node_count); + println!(" nodes:{}, proposer_nodes: {}", node_count, proposer_nodes); + println!(" validators_per_node:{}", validators_per_node); println!(" post merge simulation:{}", post_merge_sim); println!(" continue_after_checks:{}", continue_after_checks); @@ -147,7 +150,7 @@ pub fn run_eth1_sim(matches: &ArgMatches) -> Result<(), String> { beacon_config.sync_eth1_chain = true; beacon_config.eth1.auto_update_interval_millis = eth1_block_time.as_millis() as u64; beacon_config.eth1.chain_id = Eth1Id::from(chain_id); - beacon_config.network.target_peers = node_count - 1; + beacon_config.network.target_peers = node_count + proposer_nodes - 1; beacon_config.network.enr_address = (Some(Ipv4Addr::LOCALHOST), None); @@ -173,7 +176,17 @@ pub fn run_eth1_sim(matches: &ArgMatches) -> Result<(), String> { * One by one, add beacon nodes to the network. */ for _ in 0..node_count - 1 { - network.add_beacon_node(beacon_config.clone()).await?; + network + .add_beacon_node(beacon_config.clone(), false) + .await?; + } + + /* + * One by one, add proposer nodes to the network. + */ + for _ in 0..proposer_nodes - 1 { + println!("Adding a proposer node"); + network.add_beacon_node(beacon_config.clone(), true).await?; } /* @@ -310,7 +323,7 @@ pub fn run_eth1_sim(matches: &ArgMatches) -> Result<(), String> { */ println!( "Simulation complete. Finished with {} beacon nodes and {} validator clients", - network.beacon_node_count(), + network.beacon_node_count() + network.proposer_node_count(), network.validator_client_count() ); diff --git a/testing/simulator/src/local_network.rs b/testing/simulator/src/local_network.rs index 3e481df88..e35870d12 100644 --- a/testing/simulator/src/local_network.rs +++ b/testing/simulator/src/local_network.rs @@ -25,6 +25,7 @@ pub const TERMINAL_BLOCK: u64 = 64; pub struct Inner { pub context: RuntimeContext, pub beacon_nodes: RwLock>>, + pub proposer_nodes: RwLock>>, pub validator_clients: RwLock>>, pub execution_nodes: RwLock>>, } @@ -97,6 +98,7 @@ impl LocalNetwork { inner: Arc::new(Inner { context, beacon_nodes: RwLock::new(vec![beacon_node]), + proposer_nodes: RwLock::new(vec![]), execution_nodes: RwLock::new(execution_node), validator_clients: RwLock::new(vec![]), }), @@ -111,6 +113,14 @@ impl LocalNetwork { self.beacon_nodes.read().len() } + /// Returns the number of proposer nodes in the network. + /// + /// Note: does not count nodes that are external to this `LocalNetwork` that may have connected + /// (e.g., another Lighthouse process on the same machine.) + pub fn proposer_node_count(&self) -> usize { + self.proposer_nodes.read().len() + } + /// Returns the number of validator clients in the network. /// /// Note: does not count nodes that are external to this `LocalNetwork` that may have connected @@ -120,7 +130,11 @@ impl LocalNetwork { } /// Adds a beacon node to the network, connecting to the 0'th beacon node via ENR. - pub async fn add_beacon_node(&self, mut beacon_config: ClientConfig) -> Result<(), String> { + pub async fn add_beacon_node( + &self, + mut beacon_config: ClientConfig, + is_proposer: bool, + ) -> Result<(), String> { let self_1 = self.clone(); let count = self.beacon_node_count() as u16; println!("Adding beacon node.."); @@ -135,6 +149,7 @@ impl LocalNetwork { .enr() .expect("bootnode must have a network"), ); + let count = (self.beacon_node_count() + self.proposer_node_count()) as u16; beacon_config.network.set_ipv4_listening_address( std::net::Ipv4Addr::UNSPECIFIED, BOOTNODE_PORT + count, @@ -143,6 +158,7 @@ impl LocalNetwork { beacon_config.network.enr_udp4_port = Some(BOOTNODE_PORT + count); beacon_config.network.enr_tcp4_port = Some(BOOTNODE_PORT + count); beacon_config.network.discv5_config.table_filter = |_| true; + beacon_config.network.proposer_only = is_proposer; } if let Some(el_config) = &mut beacon_config.execution_layer { let config = MockExecutionConfig { @@ -173,7 +189,11 @@ impl LocalNetwork { beacon_config, ) .await?; - self_1.beacon_nodes.write().push(beacon_node); + if is_proposer { + self_1.proposer_nodes.write().push(beacon_node); + } else { + self_1.beacon_nodes.write().push(beacon_node); + } Ok(()) } @@ -200,6 +220,16 @@ impl LocalNetwork { .http_api_listen_addr() .expect("Must have http started") }; + // If there is a proposer node for the same index, we will use that for proposing + let proposer_socket_addr = { + let read_lock = self.proposer_nodes.read(); + read_lock.get(beacon_node).map(|proposer_node| { + proposer_node + .client + .http_api_listen_addr() + .expect("Must have http started") + }) + }; let beacon_node = SensitiveUrl::parse( format!("http://{}:{}", socket_addr.ip(), socket_addr.port()).as_str(), @@ -210,6 +240,21 @@ impl LocalNetwork { } else { vec![beacon_node] }; + + // If we have a proposer node established, use it. + if let Some(proposer_socket_addr) = proposer_socket_addr { + let url = SensitiveUrl::parse( + format!( + "http://{}:{}", + proposer_socket_addr.ip(), + proposer_socket_addr.port() + ) + .as_str(), + ) + .unwrap(); + validator_config.proposer_nodes = vec![url]; + } + let validator_client = LocalValidatorClient::production_with_insecure_keypairs( context, validator_config, @@ -223,9 +268,11 @@ impl LocalNetwork { /// For all beacon nodes in `Self`, return a HTTP client to access each nodes HTTP API. pub fn remote_nodes(&self) -> Result, String> { let beacon_nodes = self.beacon_nodes.read(); + let proposer_nodes = self.proposer_nodes.read(); beacon_nodes .iter() + .chain(proposer_nodes.iter()) .map(|beacon_node| beacon_node.remote_node()) .collect() } diff --git a/testing/simulator/src/no_eth1_sim.rs b/testing/simulator/src/no_eth1_sim.rs index f1f6dc442..b7598f9fa 100644 --- a/testing/simulator/src/no_eth1_sim.rs +++ b/testing/simulator/src/no_eth1_sim.rs @@ -100,7 +100,9 @@ pub fn run_no_eth1_sim(matches: &ArgMatches) -> Result<(), String> { */ for _ in 0..node_count - 1 { - network.add_beacon_node(beacon_config.clone()).await?; + network + .add_beacon_node(beacon_config.clone(), false) + .await?; } /* @@ -151,7 +153,7 @@ pub fn run_no_eth1_sim(matches: &ArgMatches) -> Result<(), String> { */ println!( "Simulation complete. Finished with {} beacon nodes and {} validator clients", - network.beacon_node_count(), + network.beacon_node_count() + network.proposer_node_count(), network.validator_client_count() ); diff --git a/testing/simulator/src/sync_sim.rs b/testing/simulator/src/sync_sim.rs index c437457c2..5eaed809d 100644 --- a/testing/simulator/src/sync_sim.rs +++ b/testing/simulator/src/sync_sim.rs @@ -228,7 +228,7 @@ pub async fn verify_one_node_sync( ) .await; // Add a beacon node - network.add_beacon_node(beacon_config).await?; + network.add_beacon_node(beacon_config, false).await?; // Check every `epoch_duration` if nodes are synced // limited to at most `sync_timeout` epochs let mut interval = tokio::time::interval(epoch_duration); @@ -265,8 +265,10 @@ pub async fn verify_two_nodes_sync( ) .await; // Add beacon nodes - network.add_beacon_node(beacon_config.clone()).await?; - network.add_beacon_node(beacon_config).await?; + network + .add_beacon_node(beacon_config.clone(), false) + .await?; + network.add_beacon_node(beacon_config, false).await?; // Check every `epoch_duration` if nodes are synced // limited to at most `sync_timeout` epochs let mut interval = tokio::time::interval(epoch_duration); @@ -305,8 +307,10 @@ pub async fn verify_in_between_sync( ) .await; // Add two beacon nodes - network.add_beacon_node(beacon_config.clone()).await?; - network.add_beacon_node(beacon_config).await?; + network + .add_beacon_node(beacon_config.clone(), false) + .await?; + network.add_beacon_node(beacon_config, false).await?; // Delay before adding additional syncing nodes. epoch_delay( Epoch::new(sync_timeout - 5), @@ -315,7 +319,7 @@ pub async fn verify_in_between_sync( ) .await; // Add a beacon node - network.add_beacon_node(config1.clone()).await?; + network.add_beacon_node(config1.clone(), false).await?; // Check every `epoch_duration` if nodes are synced // limited to at most `sync_timeout` epochs let mut interval = tokio::time::interval(epoch_duration); diff --git a/validator_client/src/block_service.rs b/validator_client/src/block_service.rs index 3b3749237..61a5a094c 100644 --- a/validator_client/src/block_service.rs +++ b/validator_client/src/block_service.rs @@ -7,8 +7,11 @@ use crate::{ }; use crate::{http_metrics::metrics, validator_store::ValidatorStore}; use environment::RuntimeContext; +use eth2::BeaconNodeHttpClient; use slog::{crit, debug, error, info, trace, warn}; use slot_clock::SlotClock; +use std::fmt::Debug; +use std::future::Future; use std::ops::Deref; use std::sync::Arc; use std::time::Duration; @@ -45,6 +48,7 @@ pub struct BlockServiceBuilder { validator_store: Option>>, slot_clock: Option>, beacon_nodes: Option>>, + proposer_nodes: Option>>, context: Option>, graffiti: Option, graffiti_file: Option, @@ -57,6 +61,7 @@ impl BlockServiceBuilder { validator_store: None, slot_clock: None, beacon_nodes: None, + proposer_nodes: None, context: None, graffiti: None, graffiti_file: None, @@ -79,6 +84,11 @@ impl BlockServiceBuilder { self } + pub fn proposer_nodes(mut self, proposer_nodes: Arc>) -> Self { + self.proposer_nodes = Some(proposer_nodes); + self + } + pub fn runtime_context(mut self, context: RuntimeContext) -> Self { self.context = Some(context); self @@ -114,6 +124,7 @@ impl BlockServiceBuilder { context: self .context .ok_or("Cannot build BlockService without runtime_context")?, + proposer_nodes: self.proposer_nodes, graffiti: self.graffiti, graffiti_file: self.graffiti_file, block_delay: self.block_delay, @@ -122,11 +133,81 @@ impl BlockServiceBuilder { } } +// Combines a set of non-block-proposing `beacon_nodes` and only-block-proposing +// `proposer_nodes`. +pub struct ProposerFallback { + beacon_nodes: Arc>, + proposer_nodes: Option>>, +} + +impl ProposerFallback { + // Try `func` on `self.proposer_nodes` first. If that doesn't work, try `self.beacon_nodes`. + pub async fn first_success_try_proposers_first<'a, F, O, Err, R>( + &'a self, + require_synced: RequireSynced, + offline_on_failure: OfflineOnFailure, + func: F, + ) -> Result> + where + F: Fn(&'a BeaconNodeHttpClient) -> R + Clone, + R: Future>, + Err: Debug, + { + // If there are proposer nodes, try calling `func` on them and return early if they are successful. + if let Some(proposer_nodes) = &self.proposer_nodes { + if let Ok(result) = proposer_nodes + .first_success(require_synced, offline_on_failure, func.clone()) + .await + { + return Ok(result); + } + } + + // If the proposer nodes failed, try on the non-proposer nodes. + self.beacon_nodes + .first_success(require_synced, offline_on_failure, func) + .await + } + + // Try `func` on `self.beacon_nodes` first. If that doesn't work, try `self.proposer_nodes`. + pub async fn first_success_try_proposers_last<'a, F, O, Err, R>( + &'a self, + require_synced: RequireSynced, + offline_on_failure: OfflineOnFailure, + func: F, + ) -> Result> + where + F: Fn(&'a BeaconNodeHttpClient) -> R + Clone, + R: Future>, + Err: Debug, + { + // Try running `func` on the non-proposer beacon nodes. + let beacon_nodes_result = self + .beacon_nodes + .first_success(require_synced, offline_on_failure, func.clone()) + .await; + + match (beacon_nodes_result, &self.proposer_nodes) { + // The non-proposer node call succeed, return the result. + (Ok(success), _) => Ok(success), + // The non-proposer node call failed, but we don't have any proposer nodes. Return an error. + (Err(e), None) => Err(e), + // The non-proposer node call failed, try the same call on the proposer nodes. + (Err(_), Some(proposer_nodes)) => { + proposer_nodes + .first_success(require_synced, offline_on_failure, func) + .await + } + } + } +} + /// Helper to minimise `Arc` usage. pub struct Inner { validator_store: Arc>, slot_clock: Arc, beacon_nodes: Arc>, + proposer_nodes: Option>>, context: RuntimeContext, graffiti: Option, graffiti_file: Option, @@ -334,16 +415,23 @@ impl BlockService { let self_ref = &self; let proposer_index = self.validator_store.validator_index(&validator_pubkey); let validator_pubkey_ref = &validator_pubkey; + let proposer_fallback = ProposerFallback { + beacon_nodes: self.beacon_nodes.clone(), + proposer_nodes: self.proposer_nodes.clone(), + }; info!( log, "Requesting unsigned block"; "slot" => slot.as_u64(), ); + // Request block from first responsive beacon node. - let block = self - .beacon_nodes - .first_success( + // + // Try the proposer nodes last, since it's likely that they don't have a + // great view of attestations on the network. + let block = proposer_fallback + .first_success_try_proposers_last( RequireSynced::No, OfflineOnFailure::Yes, |beacon_node| async move { @@ -424,8 +512,12 @@ impl BlockService { ); // Publish block with first available beacon node. - self.beacon_nodes - .first_success( + // + // Try the proposer nodes first, since we've likely gone to efforts to + // protect them from DoS attacks and they're most likely to successfully + // publish a block. + proposer_fallback + .first_success_try_proposers_first( RequireSynced::No, OfflineOnFailure::Yes, |beacon_node| async { diff --git a/validator_client/src/cli.rs b/validator_client/src/cli.rs index fd96aa1f5..41ef85dfc 100644 --- a/validator_client/src/cli.rs +++ b/validator_client/src/cli.rs @@ -26,6 +26,15 @@ pub fn cli_app<'a, 'b>() -> App<'a, 'b> { ) .takes_value(true), ) + .arg( + Arg::with_name("proposer-nodes") + .long("proposer-nodes") + .value_name("NETWORK_ADDRESSES") + .help("Comma-separated addresses to one or more beacon node HTTP APIs. \ + These specify nodes that are used to send beacon block proposals. A failure will revert back to the standard beacon nodes specified in --beacon-nodes." + ) + .takes_value(true), + ) .arg( Arg::with_name("disable-run-on-all") .long("disable-run-on-all") @@ -118,7 +127,7 @@ pub fn cli_app<'a, 'b>() -> App<'a, 'b> { .value_name("CERTIFICATE-FILES") .takes_value(true) .help("Comma-separated paths to custom TLS certificates to use when connecting \ - to a beacon node. These certificates must be in PEM format and are used \ + to a beacon node (and/or proposer node). These certificates must be in PEM format and are used \ in addition to the OS trust store. Commas must only be used as a \ delimiter, and must not be part of the certificate path.") ) diff --git a/validator_client/src/config.rs b/validator_client/src/config.rs index 724d6c74f..b6e808a86 100644 --- a/validator_client/src/config.rs +++ b/validator_client/src/config.rs @@ -29,6 +29,8 @@ pub struct Config { /// /// Should be similar to `["http://localhost:8080"]` pub beacon_nodes: Vec, + /// An optional beacon node used for block proposals only. + pub proposer_nodes: Vec, /// If true, the validator client will still poll for duties and produce blocks even if the /// beacon node is not synced at startup. pub allow_unsynced_beacon_node: bool, @@ -95,6 +97,7 @@ impl Default for Config { validator_dir, secrets_dir, beacon_nodes, + proposer_nodes: Vec::new(), allow_unsynced_beacon_node: false, disable_auto_discover: false, init_slashing_protection: false, @@ -186,6 +189,14 @@ impl Config { .map_err(|e| format!("Unable to parse beacon node URL: {:?}", e))?]; } + if let Some(proposer_nodes) = parse_optional::(cli_args, "proposer_nodes")? { + config.proposer_nodes = proposer_nodes + .split(',') + .map(SensitiveUrl::parse) + .collect::>() + .map_err(|e| format!("Unable to parse proposer node URL: {:?}", e))?; + } + if cli_args.is_present("delete-lockfiles") { warn!( log, diff --git a/validator_client/src/lib.rs b/validator_client/src/lib.rs index 556fdef26..cfe355f54 100644 --- a/validator_client/src/lib.rs +++ b/validator_client/src/lib.rs @@ -24,6 +24,7 @@ pub use config::Config; use initialized_validators::InitializedValidators; use lighthouse_metrics::set_gauge; use monitoring_api::{MonitoringHttpClient, ProcessType}; +use sensitive_url::SensitiveUrl; pub use slashing_protection::{SlashingDatabase, SLASHING_PROTECTION_FILENAME}; use crate::beacon_node_fallback::{ @@ -263,60 +264,70 @@ impl ProductionValidatorClient { .checked_sub(1) .ok_or_else(|| "No beacon nodes defined.".to_string())?; + let beacon_node_setup = |x: (usize, &SensitiveUrl)| { + let i = x.0; + let url = x.1; + let slot_duration = Duration::from_secs(context.eth2_config.spec.seconds_per_slot); + + let mut beacon_node_http_client_builder = ClientBuilder::new(); + + // Add new custom root certificates if specified. + if let Some(certificates) = &config.beacon_nodes_tls_certs { + for cert in certificates { + beacon_node_http_client_builder = beacon_node_http_client_builder + .add_root_certificate(load_pem_certificate(cert)?); + } + } + + let beacon_node_http_client = beacon_node_http_client_builder + // Set default timeout to be the full slot duration. + .timeout(slot_duration) + .build() + .map_err(|e| format!("Unable to build HTTP client: {:?}", e))?; + + // Use quicker timeouts if a fallback beacon node exists. + let timeouts = if i < last_beacon_node_index && !config.use_long_timeouts { + info!( + log, + "Fallback endpoints are available, using optimized timeouts."; + ); + Timeouts { + attestation: slot_duration / HTTP_ATTESTATION_TIMEOUT_QUOTIENT, + attester_duties: slot_duration / HTTP_ATTESTER_DUTIES_TIMEOUT_QUOTIENT, + liveness: slot_duration / HTTP_LIVENESS_TIMEOUT_QUOTIENT, + proposal: slot_duration / HTTP_PROPOSAL_TIMEOUT_QUOTIENT, + proposer_duties: slot_duration / HTTP_PROPOSER_DUTIES_TIMEOUT_QUOTIENT, + sync_committee_contribution: slot_duration + / HTTP_SYNC_COMMITTEE_CONTRIBUTION_TIMEOUT_QUOTIENT, + sync_duties: slot_duration / HTTP_SYNC_DUTIES_TIMEOUT_QUOTIENT, + get_beacon_blocks_ssz: slot_duration + / HTTP_GET_BEACON_BLOCK_SSZ_TIMEOUT_QUOTIENT, + get_debug_beacon_states: slot_duration / HTTP_GET_DEBUG_BEACON_STATE_QUOTIENT, + get_deposit_snapshot: slot_duration / HTTP_GET_DEPOSIT_SNAPSHOT_QUOTIENT, + } + } else { + Timeouts::set_all(slot_duration) + }; + + Ok(BeaconNodeHttpClient::from_components( + url.clone(), + beacon_node_http_client, + timeouts, + )) + }; + let beacon_nodes: Vec = config .beacon_nodes .iter() .enumerate() - .map(|(i, url)| { - let slot_duration = Duration::from_secs(context.eth2_config.spec.seconds_per_slot); + .map(beacon_node_setup) + .collect::, String>>()?; - let mut beacon_node_http_client_builder = ClientBuilder::new(); - - // Add new custom root certificates if specified. - if let Some(certificates) = &config.beacon_nodes_tls_certs { - for cert in certificates { - beacon_node_http_client_builder = beacon_node_http_client_builder - .add_root_certificate(load_pem_certificate(cert)?); - } - } - - let beacon_node_http_client = beacon_node_http_client_builder - // Set default timeout to be the full slot duration. - .timeout(slot_duration) - .build() - .map_err(|e| format!("Unable to build HTTP client: {:?}", e))?; - - // Use quicker timeouts if a fallback beacon node exists. - let timeouts = if i < last_beacon_node_index && !config.use_long_timeouts { - info!( - log, - "Fallback endpoints are available, using optimized timeouts."; - ); - Timeouts { - attestation: slot_duration / HTTP_ATTESTATION_TIMEOUT_QUOTIENT, - attester_duties: slot_duration / HTTP_ATTESTER_DUTIES_TIMEOUT_QUOTIENT, - liveness: slot_duration / HTTP_LIVENESS_TIMEOUT_QUOTIENT, - proposal: slot_duration / HTTP_PROPOSAL_TIMEOUT_QUOTIENT, - proposer_duties: slot_duration / HTTP_PROPOSER_DUTIES_TIMEOUT_QUOTIENT, - sync_committee_contribution: slot_duration - / HTTP_SYNC_COMMITTEE_CONTRIBUTION_TIMEOUT_QUOTIENT, - sync_duties: slot_duration / HTTP_SYNC_DUTIES_TIMEOUT_QUOTIENT, - get_beacon_blocks_ssz: slot_duration - / HTTP_GET_BEACON_BLOCK_SSZ_TIMEOUT_QUOTIENT, - get_debug_beacon_states: slot_duration - / HTTP_GET_DEBUG_BEACON_STATE_QUOTIENT, - get_deposit_snapshot: slot_duration / HTTP_GET_DEPOSIT_SNAPSHOT_QUOTIENT, - } - } else { - Timeouts::set_all(slot_duration) - }; - - Ok(BeaconNodeHttpClient::from_components( - url.clone(), - beacon_node_http_client, - timeouts, - )) - }) + let proposer_nodes: Vec = config + .proposer_nodes + .iter() + .enumerate() + .map(beacon_node_setup) .collect::, String>>()?; let num_nodes = beacon_nodes.len(); @@ -325,6 +336,12 @@ impl ProductionValidatorClient { .map(CandidateBeaconNode::new) .collect(); + let proposer_nodes_num = proposer_nodes.len(); + let proposer_candidates = proposer_nodes + .into_iter() + .map(CandidateBeaconNode::new) + .collect(); + // Set the count for beacon node fallbacks excluding the primary beacon node. set_gauge( &http_metrics::metrics::ETH2_FALLBACK_CONFIGURED, @@ -349,9 +366,16 @@ impl ProductionValidatorClient { log.clone(), ); + let mut proposer_nodes: BeaconNodeFallback<_, T> = BeaconNodeFallback::new( + proposer_candidates, + config.disable_run_on_all, + context.eth2_config.spec.clone(), + log.clone(), + ); + // Perform some potentially long-running initialization tasks. let (genesis_time, genesis_validators_root) = tokio::select! { - tuple = init_from_beacon_node(&beacon_nodes, &context) => tuple?, + tuple = init_from_beacon_node(&beacon_nodes, &proposer_nodes, &context) => tuple?, () = context.executor.exit() => return Err("Shutting down".to_string()) }; @@ -367,9 +391,14 @@ impl ProductionValidatorClient { ); beacon_nodes.set_slot_clock(slot_clock.clone()); + proposer_nodes.set_slot_clock(slot_clock.clone()); + let beacon_nodes = Arc::new(beacon_nodes); start_fallback_updater_service(context.clone(), beacon_nodes.clone())?; + let proposer_nodes = Arc::new(proposer_nodes); + start_fallback_updater_service(context.clone(), proposer_nodes.clone())?; + let doppelganger_service = if config.enable_doppelganger_protection { Some(Arc::new(DoppelgangerService::new( context @@ -433,15 +462,21 @@ impl ProductionValidatorClient { ctx.shared.write().duties_service = Some(duties_service.clone()); } - let block_service = BlockServiceBuilder::new() + let mut block_service_builder = BlockServiceBuilder::new() .slot_clock(slot_clock.clone()) .validator_store(validator_store.clone()) .beacon_nodes(beacon_nodes.clone()) .runtime_context(context.service_context("block".into())) .graffiti(config.graffiti) .graffiti_file(config.graffiti_file.clone()) - .block_delay(config.block_delay) - .build()?; + .block_delay(config.block_delay); + + // If we have proposer nodes, add them to the block service builder. + if proposer_nodes_num > 0 { + block_service_builder = block_service_builder.proposer_nodes(proposer_nodes.clone()); + } + + let block_service = block_service_builder.build()?; let attestation_service = AttestationServiceBuilder::new() .duties_service(duties_service.clone()) @@ -581,13 +616,32 @@ impl ProductionValidatorClient { async fn init_from_beacon_node( beacon_nodes: &BeaconNodeFallback, + proposer_nodes: &BeaconNodeFallback, context: &RuntimeContext, ) -> Result<(u64, Hash256), String> { loop { beacon_nodes.update_unready_candidates().await; + proposer_nodes.update_unready_candidates().await; + let num_available = beacon_nodes.num_available().await; let num_total = beacon_nodes.num_total(); - if num_available > 0 { + + let proposer_available = beacon_nodes.num_available().await; + let proposer_total = beacon_nodes.num_total(); + + if proposer_total > 0 && proposer_available == 0 { + warn!( + context.log(), + "Unable to connect to a proposer node"; + "retry in" => format!("{} seconds", RETRY_DELAY.as_secs()), + "total_proposers" => proposer_total, + "available_proposers" => proposer_available, + "total_beacon_nodes" => num_total, + "available_beacon_nodes" => num_available, + ); + } + + if num_available > 0 && proposer_available == 0 { info!( context.log(), "Initialized beacon node connections"; @@ -595,6 +649,16 @@ async fn init_from_beacon_node( "available" => num_available, ); break; + } else if num_available > 0 { + info!( + context.log(), + "Initialized beacon node connections"; + "total" => num_total, + "available" => num_available, + "proposers_available" => proposer_available, + "proposers_total" => proposer_total, + ); + break; } else { warn!( context.log(),