From 661307dce198200b067dd9935ec95d256a154c87 Mon Sep 17 00:00:00 2001 From: Paul Hauner Date: Tue, 30 Aug 2022 05:47:31 +0000 Subject: [PATCH] Separate committee subscriptions queue (#3508) ## Issue Addressed NA ## Proposed Changes As we've seen on Prater, there seems to be a correlation between these messages ``` WARN Not enough time for a discovery search subnet_id: ExactSubnet { subnet_id: SubnetId(19), slot: Slot(3742336) }, service: attestation_service ``` ... and nodes falling 20-30 slots behind the head for short periods. These nodes are running ~20k Prater validators. After running some metrics, I can see that the `network_recv` channel is processing ~250k `AttestationSubscribe` messages per minute. It occurred to me that perhaps the `AttestationSubscribe` messages are "washing out" the `SendRequest` and `SendResponse` messages. In this PR I separate the `AttestationSubscribe` and `SyncCommitteeSubscribe` messages into their own queue so the `tokio::select!` in the `NetworkService` can still process the other messages in the `network_recv` channel without necessarily having to clear all the subscription messages first. ~~I've also added filter to the HTTP API to prevent duplicate subscriptions going to the network service.~~ ## Additional Info - Currently being tested on Prater --- beacon_node/client/src/builder.rs | 24 ++--- beacon_node/http_api/src/lib.rs | 105 ++++++++++++++------- beacon_node/http_api/tests/common.rs | 14 +-- beacon_node/http_api/tests/tests.rs | 36 +++---- beacon_node/network/src/lib.rs | 4 +- beacon_node/network/src/metrics.rs | 14 +++ beacon_node/network/src/service.rs | 134 ++++++++++++++++++++------- common/eth2/src/types.rs | 2 +- 8 files changed, 231 insertions(+), 102 deletions(-) diff --git a/beacon_node/client/src/builder.rs b/beacon_node/client/src/builder.rs index d4c41244d..752ba3b7b 100644 --- a/beacon_node/client/src/builder.rs +++ b/beacon_node/client/src/builder.rs @@ -22,7 +22,7 @@ use execution_layer::ExecutionLayer; use genesis::{interop_genesis_state, Eth1GenesisService, DEFAULT_ETH1_BLOCK_HASH}; use lighthouse_network::{prometheus_client::registry::Registry, NetworkGlobals}; use monitoring_api::{MonitoringHttpClient, ProcessType}; -use network::{NetworkConfig, NetworkMessage, NetworkService}; +use network::{NetworkConfig, NetworkSenders, NetworkService}; use slasher::Slasher; use slasher_service::SlasherService; use slog::{debug, info, warn, Logger}; @@ -31,7 +31,7 @@ use std::path::{Path, PathBuf}; use std::sync::Arc; use std::time::Duration; use timer::spawn_timer; -use tokio::sync::{mpsc::UnboundedSender, oneshot}; +use tokio::sync::oneshot; use types::{ test_utils::generate_deterministic_keypairs, BeaconState, ChainSpec, EthSpec, ExecutionBlockHash, Hash256, SignedBeaconBlock, @@ -66,7 +66,7 @@ pub struct ClientBuilder { beacon_chain: Option>>, eth1_service: Option, network_globals: Option>>, - network_send: Option>>, + network_senders: Option>, gossipsub_registry: Option, db_path: Option, freezer_db_path: Option, @@ -98,7 +98,7 @@ where beacon_chain: None, eth1_service: None, network_globals: None, - network_send: None, + network_senders: None, gossipsub_registry: None, db_path: None, freezer_db_path: None, @@ -397,7 +397,7 @@ where > = Arc::new(http_api::Context { config: self.http_api_config.clone(), chain: None, - network_tx: None, + network_senders: None, network_globals: None, eth1_service: Some(genesis_service.eth1_service.clone()), log: context.log().clone(), @@ -481,7 +481,7 @@ where None }; - let (network_globals, network_send) = NetworkService::start( + let (network_globals, network_senders) = NetworkService::start( beacon_chain, config, context.executor, @@ -493,7 +493,7 @@ where .map_err(|e| format!("Failed to start network: {:?}", e))?; self.network_globals = Some(network_globals); - self.network_send = Some(network_send); + self.network_senders = Some(network_senders); self.gossipsub_registry = gossipsub_registry; Ok(self) @@ -537,16 +537,16 @@ where .beacon_chain .clone() .ok_or("slasher service requires a beacon chain")?; - let network_send = self - .network_send + let network_senders = self + .network_senders .clone() - .ok_or("slasher service requires a network sender")?; + .ok_or("slasher service requires network senders")?; let context = self .runtime_context .as_ref() .ok_or("slasher requires a runtime_context")? .service_context("slasher_service_ctxt".into()); - SlasherService::new(beacon_chain, network_send).run(&context.executor) + SlasherService::new(beacon_chain, network_senders.network_send()).run(&context.executor) } /// Start the explorer client which periodically sends beacon @@ -616,7 +616,7 @@ where let ctx = Arc::new(http_api::Context { config: self.http_api_config.clone(), chain: self.beacon_chain.clone(), - network_tx: self.network_send.clone(), + network_senders: self.network_senders.clone(), network_globals: self.network_globals.clone(), eth1_service: self.eth1_service.clone(), log: log.clone(), diff --git a/beacon_node/http_api/src/lib.rs b/beacon_node/http_api/src/lib.rs index 48178f4f0..a21b67417 100644 --- a/beacon_node/http_api/src/lib.rs +++ b/beacon_node/http_api/src/lib.rs @@ -25,11 +25,10 @@ use beacon_chain::{ BeaconChainTypes, ProduceBlockVerification, WhenSlotSkipped, }; pub use block_id::BlockId; -use eth2::types::ValidatorStatus; -use eth2::types::{self as api_types, EndpointVersion, ValidatorId}; +use eth2::types::{self as api_types, EndpointVersion, ValidatorId, ValidatorStatus}; use lighthouse_network::{types::SyncState, EnrExt, NetworkGlobals, PeerId, PubsubMessage}; use lighthouse_version::version_with_platform; -use network::NetworkMessage; +use network::{NetworkMessage, NetworkSenders, ValidatorSubscriptionMessage}; use serde::{Deserialize, Serialize}; use slog::{crit, debug, error, info, warn, Logger}; use slot_clock::SlotClock; @@ -42,7 +41,7 @@ use std::net::{IpAddr, Ipv4Addr, SocketAddr}; use std::path::PathBuf; use std::pin::Pin; use std::sync::Arc; -use tokio::sync::mpsc::UnboundedSender; +use tokio::sync::mpsc::{Sender, UnboundedSender}; use tokio_stream::{wrappers::BroadcastStream, StreamExt}; use types::{ Attestation, AttestationData, AttesterSlashing, BeaconStateError, BlindedPayload, @@ -93,7 +92,7 @@ pub struct TlsConfig { pub struct Context { pub config: Config, pub chain: Option>>, - pub network_tx: Option>>, + pub network_senders: Option>, pub network_globals: Option>>, pub eth1_service: Option, pub log: Logger, @@ -337,14 +336,35 @@ pub fn serve( }); // Create a `warp` filter that provides access to the network sender channel. - let inner_ctx = ctx.clone(); - let network_tx_filter = warp::any() - .map(move || inner_ctx.network_tx.clone()) - .and_then(|network_tx| async move { - match network_tx { - Some(network_tx) => Ok(network_tx), + let network_tx = ctx + .network_senders + .as_ref() + .map(|senders| senders.network_send()); + let network_tx_filter = + warp::any() + .map(move || network_tx.clone()) + .and_then(|network_tx| async move { + match network_tx { + Some(network_tx) => Ok(network_tx), + None => Err(warp_utils::reject::custom_not_found( + "The networking stack has not yet started (network_tx).".to_string(), + )), + } + }); + + // Create a `warp` filter that provides access to the network attestation subscription channel. + let validator_subscriptions_tx = ctx + .network_senders + .as_ref() + .map(|senders| senders.validator_subscription_send()); + let validator_subscription_tx_filter = warp::any() + .map(move || validator_subscriptions_tx.clone()) + .and_then(|validator_subscriptions_tx| async move { + match validator_subscriptions_tx { + Some(validator_subscriptions_tx) => Ok(validator_subscriptions_tx), None => Err(warp_utils::reject::custom_not_found( - "The networking stack has not yet started.".to_string(), + "The networking stack has not yet started (validator_subscription_tx)." + .to_string(), )), } }); @@ -2083,7 +2103,7 @@ pub fn serve( .to_ref() .fork_name(&chain.spec) .map_err(inconsistent_fork_rejection)?; - // Pose as a V2 endpoint so we return the fork `version`. + // Pose as a V2 endpoint so we return the fork `version`. fork_versioned_response(V2, fork_name, block) .map(|response| warp::reply::json(&response)) }, @@ -2345,7 +2365,7 @@ pub fn serve( .and(not_while_syncing_filter.clone()) .and(chain_filter.clone()) .and(warp::body::json()) - .and(network_tx_filter.clone()) + .and(network_tx_filter) .and(log_filter.clone()) .and_then( |chain: Arc>, @@ -2370,12 +2390,14 @@ pub fn serve( .and(warp::path("beacon_committee_subscriptions")) .and(warp::path::end()) .and(warp::body::json()) - .and(network_tx_filter.clone()) + .and(validator_subscription_tx_filter.clone()) .and(chain_filter.clone()) + .and(log_filter.clone()) .and_then( |subscriptions: Vec, - network_tx: UnboundedSender>, - chain: Arc>| { + validator_subscription_tx: Sender, + chain: Arc>, + log: Logger| { blocking_json_task(move || { for subscription in &subscriptions { chain @@ -2383,7 +2405,7 @@ pub fn serve( .write() .auto_register_local_validator(subscription.validator_index); - let subscription = api_types::ValidatorSubscription { + let validator_subscription = api_types::ValidatorSubscription { validator_index: subscription.validator_index, attestation_committee_index: subscription.committee_index, slot: subscription.slot, @@ -2391,12 +2413,20 @@ pub fn serve( is_aggregator: subscription.is_aggregator, }; - publish_network_message( - &network_tx, - NetworkMessage::AttestationSubscribe { - subscriptions: vec![subscription], - }, - )?; + let message = ValidatorSubscriptionMessage::AttestationSubscribe { + subscriptions: vec![validator_subscription], + }; + if let Err(e) = validator_subscription_tx.try_send(message) { + warn!( + log, + "Unable to process committee subscriptions"; + "info" => "the host may be overloaded or resource-constrained", + "error" => ?e, + ); + return Err(warp_utils::reject::custom_server_error( + "unable to queue subscription, host may be overloaded or shutting down".to_string(), + )); + } } Ok(()) @@ -2581,12 +2611,15 @@ pub fn serve( .and(warp::path("sync_committee_subscriptions")) .and(warp::path::end()) .and(warp::body::json()) - .and(network_tx_filter) + .and(validator_subscription_tx_filter) .and(chain_filter.clone()) + .and(log_filter.clone()) .and_then( |subscriptions: Vec, - network_tx: UnboundedSender>, - chain: Arc>| { + validator_subscription_tx: Sender, + chain: Arc>, + log: Logger + | { blocking_json_task(move || { for subscription in subscriptions { chain @@ -2594,12 +2627,20 @@ pub fn serve( .write() .auto_register_local_validator(subscription.validator_index); - publish_network_message( - &network_tx, - NetworkMessage::SyncCommitteeSubscribe { + let message = ValidatorSubscriptionMessage::SyncCommitteeSubscribe { subscriptions: vec![subscription], - }, - )?; + }; + if let Err(e) = validator_subscription_tx.try_send(message) { + warn!( + log, + "Unable to process sync subscriptions"; + "info" => "the host may be overloaded or resource-constrained", + "error" => ?e + ); + return Err(warp_utils::reject::custom_server_error( + "unable to queue subscription, host may be overloaded or shutting down".to_string(), + )); + } } Ok(()) diff --git a/beacon_node/http_api/tests/common.rs b/beacon_node/http_api/tests/common.rs index 1dd7aea92..032e1346f 100644 --- a/beacon_node/http_api/tests/common.rs +++ b/beacon_node/http_api/tests/common.rs @@ -11,14 +11,14 @@ use lighthouse_network::{ types::{EnrAttestationBitfield, EnrSyncCommitteeBitfield, SyncState}, ConnectedPoint, Enr, NetworkGlobals, PeerId, PeerManager, }; -use network::NetworkMessage; +use network::{NetworkReceivers, NetworkSenders}; use sensitive_url::SensitiveUrl; use slog::Logger; use std::future::Future; use std::net::{IpAddr, Ipv4Addr, SocketAddr}; use std::sync::Arc; use std::time::Duration; -use tokio::sync::{mpsc, oneshot}; +use tokio::sync::oneshot; use types::{ChainSpec, EthSpec}; pub const TCP_PORT: u16 = 42; @@ -30,7 +30,7 @@ pub const EXTERNAL_ADDR: &str = "/ip4/0.0.0.0/tcp/9000"; pub struct InteractiveTester { pub harness: BeaconChainHarness>, pub client: BeaconNodeHttpClient, - pub network_rx: mpsc::UnboundedReceiver>, + pub network_rx: NetworkReceivers, _server_shutdown: oneshot::Sender<()>, } @@ -41,7 +41,7 @@ pub struct ApiServer> { pub server: SFut, pub listening_socket: SocketAddr, pub shutdown_tx: oneshot::Sender<()>, - pub network_rx: tokio::sync::mpsc::UnboundedReceiver>, + pub network_rx: NetworkReceivers, pub local_enr: Enr, pub external_peer_id: PeerId, } @@ -97,7 +97,7 @@ pub async fn create_api_server_on_port( log: Logger, port: u16, ) -> ApiServer> { - let (network_tx, network_rx) = mpsc::unbounded_channel(); + let (network_senders, network_receivers) = NetworkSenders::new(); // Default metadata let meta_data = MetaData::V2(MetaDataV2 { @@ -146,7 +146,7 @@ pub async fn create_api_server_on_port( spec_fork_name: None, }, chain: Some(chain.clone()), - network_tx: Some(network_tx), + network_senders: Some(network_senders), network_globals: Some(network_globals), eth1_service: Some(eth1_service), log, @@ -163,7 +163,7 @@ pub async fn create_api_server_on_port( server, listening_socket, shutdown_tx, - network_rx, + network_rx: network_receivers, local_enr: enr, external_peer_id: peer_id, } diff --git a/beacon_node/http_api/tests/tests.rs b/beacon_node/http_api/tests/tests.rs index 3144060f1..c8e647be8 100644 --- a/beacon_node/http_api/tests/tests.rs +++ b/beacon_node/http_api/tests/tests.rs @@ -17,14 +17,14 @@ use futures::stream::{Stream, StreamExt}; use futures::FutureExt; use http_api::{BlockId, StateId}; use lighthouse_network::{Enr, EnrExt, PeerId}; -use network::NetworkMessage; +use network::NetworkReceivers; use proto_array::ExecutionStatus; use sensitive_url::SensitiveUrl; use slot_clock::SlotClock; use state_processing::per_slot_processing; use std::convert::TryInto; use std::sync::Arc; -use tokio::sync::{mpsc, oneshot}; +use tokio::sync::oneshot; use tokio::time::Duration; use tree_hash::TreeHash; use types::application_domain::ApplicationDomain; @@ -65,7 +65,7 @@ struct ApiTester { proposer_slashing: ProposerSlashing, voluntary_exit: SignedVoluntaryExit, _server_shutdown: oneshot::Sender<()>, - network_rx: mpsc::UnboundedReceiver>, + network_rx: NetworkReceivers, local_enr: Enr, external_peer_id: PeerId, mock_builder: Option>>, @@ -899,7 +899,7 @@ impl ApiTester { self.client.post_beacon_blocks(next_block).await.unwrap(); assert!( - self.network_rx.recv().await.is_some(), + self.network_rx.network_recv.recv().await.is_some(), "valid blocks should be sent to network" ); @@ -913,7 +913,7 @@ impl ApiTester { assert!(self.client.post_beacon_blocks(&next_block).await.is_err()); assert!( - self.network_rx.recv().await.is_some(), + self.network_rx.network_recv.recv().await.is_some(), "invalid blocks should be sent to network" ); @@ -1041,7 +1041,7 @@ impl ApiTester { .unwrap(); assert!( - self.network_rx.recv().await.is_some(), + self.network_rx.network_recv.recv().await.is_some(), "valid attestation should be sent to network" ); @@ -1078,7 +1078,7 @@ impl ApiTester { } assert!( - self.network_rx.recv().await.is_some(), + self.network_rx.network_recv.recv().await.is_some(), "if some attestations are valid, we should send them to the network" ); @@ -1108,7 +1108,7 @@ impl ApiTester { .unwrap(); assert!( - self.network_rx.recv().await.is_some(), + self.network_rx.network_recv.recv().await.is_some(), "valid attester slashing should be sent to network" ); @@ -1125,7 +1125,7 @@ impl ApiTester { .unwrap_err(); assert!( - self.network_rx.recv().now_or_never().is_none(), + self.network_rx.network_recv.recv().now_or_never().is_none(), "invalid attester slashing should not be sent to network" ); @@ -1154,7 +1154,7 @@ impl ApiTester { .unwrap(); assert!( - self.network_rx.recv().await.is_some(), + self.network_rx.network_recv.recv().await.is_some(), "valid proposer slashing should be sent to network" ); @@ -1171,7 +1171,7 @@ impl ApiTester { .unwrap_err(); assert!( - self.network_rx.recv().now_or_never().is_none(), + self.network_rx.network_recv.recv().now_or_never().is_none(), "invalid proposer slashing should not be sent to network" ); @@ -1200,7 +1200,7 @@ impl ApiTester { .unwrap(); assert!( - self.network_rx.recv().await.is_some(), + self.network_rx.network_recv.recv().await.is_some(), "valid exit should be sent to network" ); @@ -1217,7 +1217,7 @@ impl ApiTester { .unwrap_err(); assert!( - self.network_rx.recv().now_or_never().is_none(), + self.network_rx.network_recv.recv().now_or_never().is_none(), "invalid exit should not be sent to network" ); @@ -2351,7 +2351,7 @@ impl ApiTester { .await .unwrap(); - assert!(self.network_rx.recv().await.is_some()); + assert!(self.network_rx.network_recv.recv().await.is_some()); self } @@ -2366,7 +2366,7 @@ impl ApiTester { .await .unwrap_err(); - assert!(self.network_rx.recv().now_or_never().is_none()); + assert!(self.network_rx.network_recv.recv().now_or_never().is_none()); self } @@ -2385,7 +2385,11 @@ impl ApiTester { .await .unwrap(); - self.network_rx.recv().now_or_never().unwrap(); + self.network_rx + .validator_subscription_recv + .recv() + .now_or_never() + .unwrap(); self } diff --git a/beacon_node/network/src/lib.rs b/beacon_node/network/src/lib.rs index 283d8dfb9..648c636ac 100644 --- a/beacon_node/network/src/lib.rs +++ b/beacon_node/network/src/lib.rs @@ -18,4 +18,6 @@ mod subnet_service; mod sync; pub use lighthouse_network::NetworkConfig; -pub use service::{NetworkMessage, NetworkService}; +pub use service::{ + NetworkMessage, NetworkReceivers, NetworkSenders, NetworkService, ValidatorSubscriptionMessage, +}; diff --git a/beacon_node/network/src/metrics.rs b/beacon_node/network/src/metrics.rs index 3605b94ac..defb9c600 100644 --- a/beacon_node/network/src/metrics.rs +++ b/beacon_node/network/src/metrics.rs @@ -252,6 +252,20 @@ lazy_static! { "Gossipsub sync_committee errors per error type", &["type"] ); + + /* + * Network queue metrics + */ + pub static ref NETWORK_RECEIVE_EVENTS: Result = try_create_int_counter_vec( + "network_receive_events", + "Count of events received by the channel to the network service", + &["type"] + ); + pub static ref NETWORK_RECEIVE_TIMES: Result = try_create_histogram_vec( + "network_receive_times", + "Time taken for network to handle an event sent to the network service.", + &["type"] + ); } lazy_static! { diff --git a/beacon_node/network/src/service.rs b/beacon_node/network/src/service.rs index 9e3302af2..f5e32dcff 100644 --- a/beacon_node/network/src/service.rs +++ b/beacon_node/network/src/service.rs @@ -26,6 +26,7 @@ use lighthouse_network::{ use slog::{crit, debug, error, info, o, trace, warn}; use std::{net::SocketAddr, pin::Pin, sync::Arc, time::Duration}; use store::HotColdDB; +use strum::IntoStaticStr; use task_executor::ShutdownReason; use tokio::sync::mpsc; use tokio::time::Sleep; @@ -42,6 +43,9 @@ const METRIC_UPDATE_INTERVAL: u64 = 5; const SUBSCRIBE_DELAY_SLOTS: u64 = 2; /// Delay after a fork where we unsubscribe from pre-fork topics. const UNSUBSCRIBE_DELAY_EPOCHS: u64 = 2; +/// Size of the queue for validator subnet subscriptions. The number is chosen so that we may be +/// able to run tens of thousands of validators on one BN. +const VALIDATOR_SUBSCRIPTION_MESSAGE_QUEUE_SIZE: usize = 65_536; /// Application level requests sent to the network. #[derive(Debug, Clone, Copy)] @@ -51,15 +55,9 @@ pub enum RequestId { } /// Types of messages that the network service can receive. -#[derive(Debug)] +#[derive(Debug, IntoStaticStr)] +#[strum(serialize_all = "snake_case")] pub enum NetworkMessage { - /// Subscribes a list of validators to specific slots for attestation duties. - AttestationSubscribe { - subscriptions: Vec, - }, - SyncCommitteeSubscribe { - subscriptions: Vec, - }, /// Subscribes the beacon node to the core gossipsub topics. We do this when we are either /// synced or close to the head slot. SubscribeCoreTopics, @@ -115,6 +113,59 @@ pub enum NetworkMessage { }, } +/// Messages triggered by validators that may trigger a subscription to a subnet. +/// +/// These messages can be very numerous with large validator counts (hundreds of thousands per +/// minute). Therefore we separate them from the separated from the `NetworkMessage` to provide +/// fairness regarding message processing. +#[derive(Debug, IntoStaticStr)] +#[strum(serialize_all = "snake_case")] +pub enum ValidatorSubscriptionMessage { + /// Subscribes a list of validators to specific slots for attestation duties. + AttestationSubscribe { + subscriptions: Vec, + }, + SyncCommitteeSubscribe { + subscriptions: Vec, + }, +} + +#[derive(Clone)] +pub struct NetworkSenders { + network_send: mpsc::UnboundedSender>, + validator_subscription_send: mpsc::Sender, +} + +pub struct NetworkReceivers { + pub network_recv: mpsc::UnboundedReceiver>, + pub validator_subscription_recv: mpsc::Receiver, +} + +impl NetworkSenders { + pub fn new() -> (Self, NetworkReceivers) { + let (network_send, network_recv) = mpsc::unbounded_channel::>(); + let (validator_subscription_send, validator_subscription_recv) = + mpsc::channel(VALIDATOR_SUBSCRIPTION_MESSAGE_QUEUE_SIZE); + let senders = Self { + network_send, + validator_subscription_send, + }; + let receivers = NetworkReceivers { + network_recv, + validator_subscription_recv, + }; + (senders, receivers) + } + + pub fn network_send(&self) -> mpsc::UnboundedSender> { + self.network_send.clone() + } + + pub fn validator_subscription_send(&self) -> mpsc::Sender { + self.validator_subscription_send.clone() + } +} + /// Service that handles communication between internal services and the `lighthouse_network` network service. pub struct NetworkService { /// A reference to the underlying beacon chain. @@ -127,6 +178,8 @@ pub struct NetworkService { sync_committee_service: SyncCommitteeService, /// The receiver channel for lighthouse to communicate with the network service. network_recv: mpsc::UnboundedReceiver>, + /// The receiver channel for lighthouse to send validator subscription requests. + validator_subscription_recv: mpsc::Receiver, /// The sending channel for the network service to send messages to be routed throughout /// lighthouse. router_send: mpsc::UnboundedSender>, @@ -168,18 +221,15 @@ impl NetworkService { config: &NetworkConfig, executor: task_executor::TaskExecutor, gossipsub_registry: Option<&'_ mut Registry>, - ) -> error::Result<( - Arc>, - mpsc::UnboundedSender>, - )> { + ) -> error::Result<(Arc>, NetworkSenders)> { let network_log = executor.log().clone(); - // build the network channel - let (network_send, network_recv) = mpsc::unbounded_channel::>(); + // build the channels for external comms + let (network_senders, network_recievers) = NetworkSenders::new(); // try and construct UPnP port mappings if required. let upnp_config = crate::nat::UPnPConfig::from(config); let upnp_log = network_log.new(o!("service" => "UPnP")); - let upnp_network_send = network_send.clone(); + let upnp_network_send = network_senders.network_send(); if config.upnp_enabled { executor.spawn_blocking( move || { @@ -244,7 +294,7 @@ impl NetworkService { let router_send = Router::spawn( beacon_chain.clone(), network_globals.clone(), - network_send.clone(), + network_senders.network_send(), executor.clone(), network_log.clone(), )?; @@ -263,6 +313,11 @@ impl NetworkService { // create a timer for updating gossipsub parameters let gossipsub_parameter_update = tokio::time::interval(Duration::from_secs(60)); + let NetworkReceivers { + network_recv, + validator_subscription_recv, + } = network_recievers; + // create the network service and spawn the task let network_log = network_log.new(o!("service" => "network")); let network_service = NetworkService { @@ -271,6 +326,7 @@ impl NetworkService { attestation_service, sync_committee_service, network_recv, + validator_subscription_recv, router_send, store, network_globals: network_globals.clone(), @@ -290,7 +346,7 @@ impl NetworkService { network_service.spawn_service(executor); - Ok((network_globals, network_send)) + Ok((network_globals, network_senders)) } /// Returns the required fork digests that gossipsub needs to subscribe to based on the current slot. @@ -358,6 +414,9 @@ impl NetworkService { // handle a message sent to the network Some(msg) = self.network_recv.recv() => self.on_network_msg(msg, &mut shutdown_sender).await, + // handle a message from a validator requesting a subscription to a subnet + Some(msg) = self.validator_subscription_recv.recv() => self.on_validator_subscription_msg(msg).await, + // process any attestation service events Some(msg) = self.attestation_service.next() => self.on_attestation_service_msg(msg), @@ -505,6 +564,9 @@ impl NetworkService { msg: NetworkMessage, shutdown_sender: &mut Sender, ) { + metrics::inc_counter_vec(&metrics::NETWORK_RECEIVE_EVENTS, &[(&msg).into()]); + let _timer = metrics::start_timer_vec(&metrics::NETWORK_RECEIVE_TIMES, &[(&msg).into()]); + match msg { NetworkMessage::SendRequest { peer_id, @@ -606,22 +668,6 @@ impl NetworkService { reason, source, } => self.libp2p.goodbye_peer(&peer_id, reason, source), - NetworkMessage::AttestationSubscribe { subscriptions } => { - if let Err(e) = self - .attestation_service - .validator_subscriptions(subscriptions) - { - warn!(self.log, "Attestation validator subscription failed"; "error" => e); - } - } - NetworkMessage::SyncCommitteeSubscribe { subscriptions } => { - if let Err(e) = self - .sync_committee_service - .validator_subscriptions(subscriptions) - { - warn!(self.log, "Sync committee calidator subscription failed"; "error" => e); - } - } NetworkMessage::SubscribeCoreTopics => { if self.shutdown_after_sync { if let Err(e) = shutdown_sender @@ -704,6 +750,28 @@ impl NetworkService { } } + /// Handle a message sent to the network service. + async fn on_validator_subscription_msg(&mut self, msg: ValidatorSubscriptionMessage) { + match msg { + ValidatorSubscriptionMessage::AttestationSubscribe { subscriptions } => { + if let Err(e) = self + .attestation_service + .validator_subscriptions(subscriptions) + { + warn!(self.log, "Attestation validator subscription failed"; "error" => e); + } + } + ValidatorSubscriptionMessage::SyncCommitteeSubscribe { subscriptions } => { + if let Err(e) = self + .sync_committee_service + .validator_subscriptions(subscriptions) + { + warn!(self.log, "Sync committee calidator subscription failed"; "error" => e); + } + } + } + } + fn update_gossipsub_parameters(&mut self) { if let Ok(slot) = self.beacon_chain.slot() { let active_validators_opt = self diff --git a/common/eth2/src/types.rs b/common/eth2/src/types.rs index 340d38b85..0f8ec5123 100644 --- a/common/eth2/src/types.rs +++ b/common/eth2/src/types.rs @@ -682,7 +682,7 @@ pub struct ValidatorAggregateAttestationQuery { pub slot: Slot, } -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Hash)] pub struct BeaconCommitteeSubscription { #[serde(with = "eth2_serde_utils::quoted_u64")] pub validator_index: u64,