From 6f4102aab6a11aaf21e2f9df041343f7318fca85 Mon Sep 17 00:00:00 2001 From: Age Manning Date: Fri, 14 Jan 2022 05:42:47 +0000 Subject: [PATCH] Network performance tuning (#2608) There is a pretty significant tradeoff between bandwidth and speed of gossipsub messages. We can reduce our bandwidth usage considerably at the cost of minimally delaying gossipsub messages. The impact of delaying messages has not been analyzed thoroughly yet, however this PR in conjunction with some gossipsub updates show considerable bandwidth reduction. This PR allows the user to set a CLI value (`network-load`) which is an integer in the range of 1 of 5 depending on their bandwidth appetite. 1 represents the least bandwidth but slowest message recieving and 5 represents the most bandwidth and fastest received message time. For low-bandwidth users it is likely to be more efficient to use a lower value. The default is set to 3, which currently represents a reduced bandwidth usage compared to previous version of this PR. The previous lighthouse versions are equivalent to setting the `network-load` CLI to 4. This PR is awaiting a few gossipsub updates before we can get it into lighthouse. --- Cargo.lock | 27 +++--- .../lighthouse_network/src/behaviour/mod.rs | 7 +- beacon_node/lighthouse_network/src/config.rs | 87 +++++++++++++++++-- .../lighthouse_network/src/discovery/mod.rs | 7 +- .../src/peer_manager/mod.rs | 2 +- beacon_node/lighthouse_network/src/service.rs | 3 +- beacon_node/src/cli.rs | 9 ++ beacon_node/src/config.rs | 7 ++ 8 files changed, 120 insertions(+), 29 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ec56aab49..bc53b134f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -111,7 +111,7 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47" dependencies = [ - "getrandom 0.2.3", + "getrandom 0.2.4", "once_cell", "version_check", ] @@ -2200,9 +2200,9 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.3" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fcd999463524c52659517fe2cea98493cfe485d10565e7b0fb07dbba7ad2753" +checksum = "418d37c8b1d42553c93648be529cb70f920d3baf8ef469b74b9638df426e0b4c" dependencies = [ "cfg-if", "libc", @@ -2945,7 +2945,7 @@ dependencies = [ "bytes", "futures", "futures-timer", - "getrandom 0.2.3", + "getrandom 0.2.4", "instant", "lazy_static", "libp2p-core 0.31.0", @@ -4065,9 +4065,9 @@ dependencies = [ [[package]] name = "openssl-probe" -version = "0.1.4" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28988d872ab76095a6e6ac88d99b54fd267702734fd7ffe610ca27f533ddb95a" +checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" [[package]] name = "openssl-src" @@ -4740,7 +4740,7 @@ version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7" dependencies = [ - "getrandom 0.2.3", + "getrandom 0.2.4", ] [[package]] @@ -4810,7 +4810,7 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "528532f3d801c87aec9def2add9ca802fe569e44a544afe633765267840abe64" dependencies = [ - "getrandom 0.2.3", + "getrandom 0.2.4", "redox_syscall", ] @@ -4851,15 +4851,16 @@ dependencies = [ [[package]] name = "reqwest" -version = "0.11.8" +version = "0.11.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c4e0a76dc12a116108933f6301b95e83634e0c47b0afbed6abbaa0601e99258" +checksum = "87f242f1488a539a79bac6dbe7c8609ae43b7914b7736210f239a37cccb32525" dependencies = [ "base64 0.13.0", "bytes", "encoding_rs", "futures-core", "futures-util", + "h2", "http", "http-body", "hyper", @@ -5614,9 +5615,9 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.7.0" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ecab6c735a6bb4139c0caafd0cc3635748bbb3acf4550e8138122099251f309" +checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83" [[package]] name = "snap" @@ -6633,7 +6634,7 @@ version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bc5cf98d8186244414c848017f0e2676b3fcb46807f6668a97dfe67359a3c4b7" dependencies = [ - "getrandom 0.2.3", + "getrandom 0.2.4", "serde", ] diff --git a/beacon_node/lighthouse_network/src/behaviour/mod.rs b/beacon_node/lighthouse_network/src/behaviour/mod.rs index 32a87166b..61ba855f6 100644 --- a/beacon_node/lighthouse_network/src/behaviour/mod.rs +++ b/beacon_node/lighthouse_network/src/behaviour/mod.rs @@ -2,7 +2,7 @@ use crate::behaviour::gossipsub_scoring_parameters::{ lighthouse_gossip_thresholds, PeerScoreSettings, }; use crate::config::gossipsub_config; -use crate::discovery::{subnet_predicate, Discovery, DiscoveryEvent, TARGET_SUBNET_PEERS}; +use crate::discovery::{subnet_predicate, Discovery, DiscoveryEvent}; use crate::peer_manager::{ config::Config as PeerManagerCfg, peerdb::score::PeerAction, peerdb::score::ReportSource, ConnectionDirection, PeerManager, PeerManagerEvent, @@ -52,6 +52,9 @@ use types::{ pub mod gossipsub_scoring_parameters; +/// The number of peers we target per subnet for discovery queries. +pub const TARGET_SUBNET_PEERS: usize = 2; + const MAX_IDENTIFY_ADDRESSES: usize = 10; /// Identifier of requests sent by a peer. @@ -227,7 +230,7 @@ impl Behaviour { max_subscriptions_per_request: 150, // 148 in theory = (64 attestation + 4 sync committee + 6 core topics) * 2 }; - config.gs_config = gossipsub_config(ctx.fork_context.clone()); + config.gs_config = gossipsub_config(config.network_load, ctx.fork_context.clone()); // If metrics are enabled for gossipsub build the configuration let gossipsub_metrics = ctx diff --git a/beacon_node/lighthouse_network/src/config.rs b/beacon_node/lighthouse_network/src/config.rs index 789242e8d..4cafcf62b 100644 --- a/beacon_node/lighthouse_network/src/config.rs +++ b/beacon_node/lighthouse_network/src/config.rs @@ -20,8 +20,6 @@ use types::{ForkContext, ForkName}; const GOSSIP_MAX_SIZE: usize = 1_048_576; // 1M /// The maximum transmit size of gossip messages in bytes post-merge. const GOSSIP_MAX_SIZE_POST_MERGE: usize = 10 * 1_048_576; // 10M -/// This is a constant to be used in discovery. The lower bound of the gossipsub mesh. -pub const MESH_N_LOW: usize = 6; /// The cache time is set to accommodate the circulation time of an attestation. /// @@ -116,6 +114,10 @@ pub struct Config { /// runtime. pub import_all_attestations: bool, + /// A setting specifying a range of values that tune the network parameters of lighthouse. The + /// lower the value the less bandwidth used, but the slower messages will be received. + pub network_load: u8, + /// Indicates if the user has set the network to be in private mode. Currently this /// prevents sending client identifying information over identify. pub private: bool, @@ -197,6 +199,7 @@ impl Default for Config { client_version: lighthouse_version::version_with_platform(), disable_discovery: false, upnp_enabled: true, + network_load: 3, private: false, subscribe_all_subnets: false, import_all_attestations: false, @@ -207,8 +210,72 @@ impl Default for Config { } } +/// Controls sizes of gossipsub meshes to tune a Lighthouse node's bandwidth/performance. +pub struct NetworkLoad { + pub name: &'static str, + pub mesh_n_low: usize, + pub outbound_min: usize, + pub mesh_n: usize, + pub mesh_n_high: usize, + pub gossip_lazy: usize, + pub history_gossip: usize, +} + +impl From for NetworkLoad { + fn from(load: u8) -> NetworkLoad { + match load { + 1 => NetworkLoad { + name: "Low", + mesh_n_low: 1, + outbound_min: 1, + mesh_n: 3, + mesh_n_high: 4, + gossip_lazy: 3, + history_gossip: 12, + }, + 2 => NetworkLoad { + name: "Low", + mesh_n_low: 2, + outbound_min: 2, + mesh_n: 4, + mesh_n_high: 8, + gossip_lazy: 3, + history_gossip: 12, + }, + 3 => NetworkLoad { + name: "Average", + mesh_n_low: 3, + outbound_min: 2, + mesh_n: 5, + mesh_n_high: 10, + gossip_lazy: 3, + history_gossip: 12, + }, + 4 => NetworkLoad { + name: "Average", + mesh_n_low: 4, + outbound_min: 3, + mesh_n: 8, + mesh_n_high: 12, + gossip_lazy: 3, + history_gossip: 12, + }, + // 5 and above + _ => NetworkLoad { + name: "High", + mesh_n_low: 5, + outbound_min: 3, + mesh_n: 10, + mesh_n_high: 15, + gossip_lazy: 5, + history_gossip: 12, + }, + } + } +} + /// Return a Lighthouse specific `GossipsubConfig` where the `message_id_fn` depends on the current fork. -pub fn gossipsub_config(fork_context: Arc) -> GossipsubConfig { +pub fn gossipsub_config(network_load: u8, fork_context: Arc) -> GossipsubConfig { // The function used to generate a gossipsub message id // We use the first 8 bytes of SHA256(data) for content addressing let fast_gossip_message_id = @@ -250,17 +317,21 @@ pub fn gossipsub_config(fork_context: Arc) -> GossipsubConfig { )[..20], ) }; + + let load = NetworkLoad::from(network_load); + GossipsubConfigBuilder::default() .max_transmit_size(gossip_max_size(is_merge_enabled)) .heartbeat_interval(Duration::from_millis(700)) - .mesh_n(8) - .mesh_n_low(MESH_N_LOW) - .mesh_n_high(12) - .gossip_lazy(6) + .mesh_n(load.mesh_n) + .mesh_n_low(load.mesh_n_low) + .mesh_outbound_min(load.outbound_min) + .mesh_n_high(load.mesh_n_high) + .gossip_lazy(load.gossip_lazy) .fanout_ttl(Duration::from_secs(60)) .history_length(12) .max_messages_per_rpc(Some(500)) // Responses to IWANT can be quite large - .history_gossip(3) + .history_gossip(load.history_gossip) .validate_messages() // require validation before propagation .validation_mode(ValidationMode::Anonymous) .duplicate_cache_time(DUPLICATE_CACHE_TIME) diff --git a/beacon_node/lighthouse_network/src/discovery/mod.rs b/beacon_node/lighthouse_network/src/discovery/mod.rs index 33e8c2c17..34c29a44d 100644 --- a/beacon_node/lighthouse_network/src/discovery/mod.rs +++ b/beacon_node/lighthouse_network/src/discovery/mod.rs @@ -7,7 +7,8 @@ pub(crate) mod enr; pub mod enr_ext; // Allow external use of the lighthouse ENR builder -use crate::{config, metrics}; +use crate::behaviour::TARGET_SUBNET_PEERS; +use crate::metrics; use crate::{error, Enr, NetworkConfig, NetworkGlobals, Subnet, SubnetDiscovery}; use discv5::{enr::NodeId, Discv5, Discv5Event}; pub use enr::{ @@ -47,8 +48,6 @@ pub use subnet_predicate::subnet_predicate; /// Local ENR storage filename. pub const ENR_FILENAME: &str = "enr.dat"; -/// Target number of peers we'd like to have connected to a given long-lived subnet. -pub const TARGET_SUBNET_PEERS: usize = config::MESH_N_LOW; /// Target number of peers to search for given a grouped subnet query. const TARGET_PEERS_FOR_GROUPED_QUERY: usize = 6; /// Number of times to attempt a discovery request. @@ -692,7 +691,7 @@ impl Discovery { return false; } - let target_peers = TARGET_SUBNET_PEERS - peers_on_subnet; + let target_peers = TARGET_SUBNET_PEERS.saturating_sub(peers_on_subnet); trace!(self.log, "Discovery query started for subnet"; "subnet_query" => ?subnet_query, "connected_peers_on_subnet" => peers_on_subnet, diff --git a/beacon_node/lighthouse_network/src/peer_manager/mod.rs b/beacon_node/lighthouse_network/src/peer_manager/mod.rs index 318bdfcdf..6b8f6fff6 100644 --- a/beacon_node/lighthouse_network/src/peer_manager/mod.rs +++ b/beacon_node/lighthouse_network/src/peer_manager/mod.rs @@ -1,6 +1,6 @@ //! Implementation of Lighthouse's peer management system. -use crate::discovery::TARGET_SUBNET_PEERS; +use crate::behaviour::TARGET_SUBNET_PEERS; use crate::rpc::{GoodbyeReason, MetaData, Protocol, RPCError, RPCResponseErrorCode}; use crate::{error, metrics, Gossipsub}; use crate::{NetworkGlobals, PeerId}; diff --git a/beacon_node/lighthouse_network/src/service.rs b/beacon_node/lighthouse_network/src/service.rs index cbb11cae4..0ccdd28fd 100644 --- a/beacon_node/lighthouse_network/src/service.rs +++ b/beacon_node/lighthouse_network/src/service.rs @@ -1,6 +1,7 @@ use crate::behaviour::{ save_metadata_to_disk, Behaviour, BehaviourEvent, PeerRequestId, Request, Response, }; +use crate::config::NetworkLoad; use crate::discovery::enr; use crate::multiaddr::Protocol; use crate::rpc::{ @@ -107,7 +108,7 @@ impl Service { &log, )); - info!(log, "Libp2p Service"; "peer_id" => %enr.peer_id()); + info!(log, "Libp2p Starting"; "peer_id" => %enr.peer_id(), "bandwidth_config" => format!("{}-{}", config.network_load, NetworkLoad::from(config.network_load).name)); let discovery_string = if config.disable_discovery { "None".into() } else { diff --git a/beacon_node/src/cli.rs b/beacon_node/src/cli.rs index 57de6c1b9..4c2960c9d 100644 --- a/beacon_node/src/cli.rs +++ b/beacon_node/src/cli.rs @@ -104,6 +104,15 @@ pub fn cli_app<'a, 'b>() -> App<'a, 'b> { .help("One or more comma-delimited base64-encoded ENR's to bootstrap the p2p network. Multiaddr is also supported.") .takes_value(true), ) + .arg( + Arg::with_name("network-load") + .long("network-load") + .value_name("INTEGER") + .help("Lighthouse's network can be tuned for bandwidth/performance. Setting this to a high value, will increase the bandwidth lighthouse uses, increasing the likelihood of redundant information in exchange for faster communication. This can increase profit of validators marginally by receiving messages faster on the network. Lower values decrease bandwidth usage, but makes communication slower which can lead to validator performance reduction. Values are in the range [1,5].") + .default_value("3") + .set(clap::ArgSettings::Hidden) + .takes_value(true), + ) .arg( Arg::with_name("disable-upnp") .long("disable-upnp") diff --git a/beacon_node/src/config.rs b/beacon_node/src/config.rs index f65e6471f..df5cf1437 100644 --- a/beacon_node/src/config.rs +++ b/beacon_node/src/config.rs @@ -626,6 +626,13 @@ pub fn set_network_config( config.discovery_port = port; } + if let Some(value) = cli_args.value_of("network-load") { + let network_load = value + .parse::() + .map_err(|_| format!("Invalid integer: {}", value))?; + config.network_load = network_load; + } + if let Some(boot_enr_str) = cli_args.value_of("boot-nodes") { let mut enrs: Vec = vec![]; let mut multiaddrs: Vec = vec![];