Additional networking metrics (#2549)
Adds additional metrics for network monitoring and evaluation. Co-authored-by: Mark Mackey <mark@sigmaprime.io>
This commit is contained in:
parent
60d917d9e9
commit
81c667b58e
821
Cargo.lock
generated
821
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@ -43,7 +43,7 @@ genesis = { path = "../genesis" }
|
||||
int_to_bytes = { path = "../../consensus/int_to_bytes" }
|
||||
rand = "0.7.3"
|
||||
proto_array = { path = "../../consensus/proto_array" }
|
||||
lru = "0.6.0"
|
||||
lru = "0.7.1"
|
||||
tempfile = "3.1.0"
|
||||
bitvec = "0.19.3"
|
||||
bls = { path = "../../crypto/bls" }
|
||||
|
@ -31,7 +31,7 @@ task_executor = { path = "../../common/task_executor" }
|
||||
environment = { path = "../../lighthouse/environment" }
|
||||
lazy_static = "1.4.0"
|
||||
lighthouse_metrics = { path = "../../common/lighthouse_metrics" }
|
||||
time = "0.3.3"
|
||||
time = "0.3.5"
|
||||
directory = {path = "../../common/directory"}
|
||||
http_api = { path = "../http_api" }
|
||||
http_metrics = { path = "../http_metrics" }
|
||||
|
@ -18,7 +18,7 @@ use eth2::{
|
||||
};
|
||||
use execution_layer::ExecutionLayer;
|
||||
use genesis::{interop_genesis_state, Eth1GenesisService, DEFAULT_ETH1_BLOCK_HASH};
|
||||
use lighthouse_network::NetworkGlobals;
|
||||
use lighthouse_network::{open_metrics_client::registry::Registry, NetworkGlobals};
|
||||
use monitoring_api::{MonitoringHttpClient, ProcessType};
|
||||
use network::{NetworkConfig, NetworkMessage, NetworkService};
|
||||
use slasher::Slasher;
|
||||
@ -65,6 +65,7 @@ pub struct ClientBuilder<T: BeaconChainTypes> {
|
||||
eth1_service: Option<Eth1Service>,
|
||||
network_globals: Option<Arc<NetworkGlobals<T::EthSpec>>>,
|
||||
network_send: Option<UnboundedSender<NetworkMessage<T::EthSpec>>>,
|
||||
gossipsub_registry: Option<Registry>,
|
||||
db_path: Option<PathBuf>,
|
||||
freezer_db_path: Option<PathBuf>,
|
||||
http_api_config: http_api::Config,
|
||||
@ -96,6 +97,7 @@ where
|
||||
eth1_service: None,
|
||||
network_globals: None,
|
||||
network_send: None,
|
||||
gossipsub_registry: None,
|
||||
db_path: None,
|
||||
freezer_db_path: None,
|
||||
http_api_config: <_>::default(),
|
||||
@ -448,13 +450,27 @@ where
|
||||
.ok_or("network requires a runtime_context")?
|
||||
.clone();
|
||||
|
||||
let (network_globals, network_send) =
|
||||
NetworkService::start(beacon_chain, config, context.executor)
|
||||
// If gossipsub metrics are required we build a registry to record them
|
||||
let mut gossipsub_registry = if config.metrics_enabled {
|
||||
Some(Registry::default())
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let (network_globals, network_send) = NetworkService::start(
|
||||
beacon_chain,
|
||||
config,
|
||||
context.executor,
|
||||
gossipsub_registry
|
||||
.as_mut()
|
||||
.map(|registry| registry.sub_registry_with_prefix("gossipsub")),
|
||||
)
|
||||
.await
|
||||
.map_err(|e| format!("Failed to start network: {:?}", e))?;
|
||||
|
||||
self.network_globals = Some(network_globals);
|
||||
self.network_send = Some(network_send);
|
||||
self.gossipsub_registry = gossipsub_registry;
|
||||
|
||||
Ok(self)
|
||||
}
|
||||
@ -562,13 +578,13 @@ where
|
||||
Ok(self)
|
||||
}
|
||||
|
||||
/// Consumers the builder, returning a `Client` if all necessary components have been
|
||||
/// Consumes the builder, returning a `Client` if all necessary components have been
|
||||
/// specified.
|
||||
///
|
||||
/// If type inference errors are being raised, see the comment on the definition of `Self`.
|
||||
#[allow(clippy::type_complexity)]
|
||||
pub fn build(
|
||||
self,
|
||||
mut self,
|
||||
) -> Result<Client<Witness<TSlotClock, TEth1Backend, TEthSpec, THotStore, TColdStore>>, String>
|
||||
{
|
||||
let runtime_context = self
|
||||
@ -615,6 +631,7 @@ where
|
||||
chain: self.beacon_chain.clone(),
|
||||
db_path: self.db_path.clone(),
|
||||
freezer_db_path: self.freezer_db_path.clone(),
|
||||
gossipsub_registry: self.gossipsub_registry.take().map(std::sync::Mutex::new),
|
||||
log: log.clone(),
|
||||
});
|
||||
|
||||
|
@ -23,7 +23,7 @@ bytes = "1.1.0"
|
||||
task_executor = { path = "../../common/task_executor" }
|
||||
hex = "0.4.2"
|
||||
eth2_ssz_types = "0.2.2"
|
||||
lru = "0.6.0"
|
||||
lru = "0.7.1"
|
||||
exit-future = "0.2.0"
|
||||
tree_hash = "0.4.1"
|
||||
tree_hash_derive = { path = "../../consensus/tree_hash_derive"}
|
||||
|
@ -2275,6 +2275,22 @@ pub fn serve<T: BeaconChainTypes>(
|
||||
})
|
||||
});
|
||||
|
||||
// GET lighthouse/nat
|
||||
let get_lighthouse_nat = warp::path("lighthouse")
|
||||
.and(warp::path("nat"))
|
||||
.and(warp::path::end())
|
||||
.and_then(|| {
|
||||
blocking_json_task(move || {
|
||||
Ok(api_types::GenericResponse::from(
|
||||
lighthouse_network::metrics::NAT_OPEN
|
||||
.as_ref()
|
||||
.map(|v| v.get())
|
||||
.unwrap_or(0)
|
||||
!= 0,
|
||||
))
|
||||
})
|
||||
});
|
||||
|
||||
// GET lighthouse/peers
|
||||
let get_lighthouse_peers = warp::path("lighthouse")
|
||||
.and(warp::path("peers"))
|
||||
@ -2622,6 +2638,7 @@ pub fn serve<T: BeaconChainTypes>(
|
||||
.or(get_validator_sync_committee_contribution.boxed())
|
||||
.or(get_lighthouse_health.boxed())
|
||||
.or(get_lighthouse_syncing.boxed())
|
||||
.or(get_lighthouse_nat.boxed())
|
||||
.or(get_lighthouse_peers.boxed())
|
||||
.or(get_lighthouse_peers_connected.boxed())
|
||||
.or(get_lighthouse_proto_array.boxed())
|
||||
|
@ -4,6 +4,7 @@
|
||||
mod metrics;
|
||||
|
||||
use beacon_chain::{BeaconChain, BeaconChainTypes};
|
||||
use lighthouse_network::open_metrics_client::registry::Registry;
|
||||
use lighthouse_version::version_with_platform;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use slog::{crit, info, Logger};
|
||||
@ -39,6 +40,7 @@ pub struct Context<T: BeaconChainTypes> {
|
||||
pub chain: Option<Arc<BeaconChain<T>>>,
|
||||
pub db_path: Option<PathBuf>,
|
||||
pub freezer_db_path: Option<PathBuf>,
|
||||
pub gossipsub_registry: Option<std::sync::Mutex<Registry>>,
|
||||
pub log: Logger,
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,7 @@
|
||||
use crate::Context;
|
||||
use beacon_chain::BeaconChainTypes;
|
||||
use lighthouse_metrics::{Encoder, TextEncoder};
|
||||
use lighthouse_network::open_metrics_client::encoding::text::encode;
|
||||
use malloc_utils::scrape_allocator_metrics;
|
||||
|
||||
pub use lighthouse_metrics::*;
|
||||
@ -51,6 +52,12 @@ pub fn gather_prometheus_metrics<T: BeaconChainTypes>(
|
||||
encoder
|
||||
.encode(&lighthouse_metrics::gather(), &mut buffer)
|
||||
.unwrap();
|
||||
// encode gossipsub metrics also if they exist
|
||||
if let Some(registry) = ctx.gossipsub_registry.as_ref() {
|
||||
if let Ok(registry_locked) = registry.lock() {
|
||||
let _ = encode(&mut buffer, ®istry_locked);
|
||||
}
|
||||
}
|
||||
|
||||
String::from_utf8(buffer).map_err(|e| format!("Failed to encode prometheus info: {:?}", e))
|
||||
}
|
||||
|
@ -25,6 +25,7 @@ async fn returns_200_ok() {
|
||||
chain: None,
|
||||
db_path: None,
|
||||
freezer_db_path: None,
|
||||
gossipsub_registry: None,
|
||||
log,
|
||||
});
|
||||
|
||||
|
@ -5,7 +5,7 @@ authors = ["Sigma Prime <contact@sigmaprime.io>"]
|
||||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
discv5 = { version = "0.1.0-beta.11", features = ["libp2p"] }
|
||||
discv5 = { version = "0.1.0-beta.13", features = ["libp2p"] }
|
||||
unsigned-varint = { version = "0.6.0", features = ["codec"] }
|
||||
types = { path = "../../consensus/types" }
|
||||
hashset_delay = { path = "../../common/hashset_delay" }
|
||||
@ -25,7 +25,7 @@ lazy_static = "1.4.0"
|
||||
lighthouse_metrics = { path = "../../common/lighthouse_metrics" }
|
||||
smallvec = "1.6.1"
|
||||
tokio-io-timeout = "1.1.1"
|
||||
lru = "0.6.0"
|
||||
lru = "0.7.1"
|
||||
parking_lot = "0.11.0"
|
||||
sha2 = "0.9.1"
|
||||
snap = "1.0.1"
|
||||
@ -38,18 +38,21 @@ directory = { path = "../../common/directory" }
|
||||
regex = "1.3.9"
|
||||
strum = { version = "0.21.0", features = ["derive"] }
|
||||
superstruct = "0.3.0"
|
||||
open-metrics-client = "0.13.0"
|
||||
|
||||
[dependencies.libp2p]
|
||||
version = "0.41.0"
|
||||
# version = "0.41.0"
|
||||
default-features = false
|
||||
features = ["websocket", "identify", "mplex", "yamux", "noise", "gossipsub", "dns-tokio", "tcp-tokio"]
|
||||
git = "https://github.com/libp2p/rust-libp2p"
|
||||
# Latest libp2p master
|
||||
rev = "17861d9cac121f7e448585a7f052d5eab4618826"
|
||||
features = ["websocket", "identify", "mplex", "yamux", "noise", "gossipsub", "dns-tokio", "tcp-tokio", "plaintext"]
|
||||
|
||||
[dev-dependencies]
|
||||
slog-term = "2.6.0"
|
||||
slog-async = "2.5.0"
|
||||
tempfile = "3.1.0"
|
||||
exit-future = "0.2.0"
|
||||
libp2p = { version = "0.41.0", default-features = false, features = ["plaintext"] }
|
||||
void = "1"
|
||||
|
||||
[features]
|
||||
|
@ -8,18 +8,19 @@ use crate::peer_manager::{
|
||||
ConnectionDirection, PeerManager, PeerManagerEvent,
|
||||
};
|
||||
use crate::rpc::*;
|
||||
use crate::service::METADATA_FILENAME;
|
||||
use crate::service::{Context as ServiceContext, METADATA_FILENAME};
|
||||
use crate::types::{
|
||||
subnet_from_topic_hash, GossipEncoding, GossipKind, GossipTopic, SnappyTransform, Subnet,
|
||||
SubnetDiscovery,
|
||||
};
|
||||
use crate::Eth2Enr;
|
||||
use crate::{error, metrics, Enr, NetworkConfig, NetworkGlobals, PubsubMessage, TopicHash};
|
||||
use crate::{error, metrics, Enr, NetworkGlobals, PubsubMessage, TopicHash};
|
||||
use libp2p::{
|
||||
core::{
|
||||
connection::ConnectionId, identity::Keypair, multiaddr::Protocol as MProtocol, Multiaddr,
|
||||
},
|
||||
gossipsub::{
|
||||
metrics::Config as GossipsubMetricsConfig,
|
||||
subscription_filter::{MaxCountSubscriptionFilter, WhitelistSubscriptionFilter},
|
||||
Gossipsub as BaseGossipsub, GossipsubEvent, IdentTopic as Topic, MessageAcceptance,
|
||||
MessageAuthenticity, MessageId,
|
||||
@ -45,7 +46,7 @@ use std::{
|
||||
task::{Context, Poll},
|
||||
};
|
||||
use types::{
|
||||
consts::altair::SYNC_COMMITTEE_SUBNET_COUNT, ChainSpec, EnrForkId, EthSpec, ForkContext,
|
||||
consts::altair::SYNC_COMMITTEE_SUBNET_COUNT, EnrForkId, EthSpec, ForkContext,
|
||||
SignedBeaconBlock, Slot, SubnetId, SyncSubnetId,
|
||||
};
|
||||
|
||||
@ -182,14 +183,14 @@ pub struct Behaviour<TSpec: EthSpec> {
|
||||
impl<TSpec: EthSpec> Behaviour<TSpec> {
|
||||
pub async fn new(
|
||||
local_key: &Keypair,
|
||||
mut config: NetworkConfig,
|
||||
ctx: ServiceContext<'_>,
|
||||
network_globals: Arc<NetworkGlobals<TSpec>>,
|
||||
log: &slog::Logger,
|
||||
fork_context: Arc<ForkContext>,
|
||||
chain_spec: &ChainSpec,
|
||||
) -> error::Result<Self> {
|
||||
let behaviour_log = log.new(o!());
|
||||
|
||||
let mut config = ctx.config.clone();
|
||||
|
||||
// Set up the Identify Behaviour
|
||||
let identify_config = if config.private {
|
||||
IdentifyConfig::new(
|
||||
@ -215,25 +216,29 @@ impl<TSpec: EthSpec> Behaviour<TSpec> {
|
||||
.eth2()
|
||||
.expect("Local ENR must have a fork id");
|
||||
|
||||
let possible_fork_digests = fork_context.all_fork_digests();
|
||||
let possible_fork_digests = ctx.fork_context.all_fork_digests();
|
||||
let filter = MaxCountSubscriptionFilter {
|
||||
filter: Self::create_whitelist_filter(
|
||||
possible_fork_digests,
|
||||
chain_spec.attestation_subnet_count,
|
||||
ctx.chain_spec.attestation_subnet_count,
|
||||
SYNC_COMMITTEE_SUBNET_COUNT,
|
||||
),
|
||||
max_subscribed_topics: 200,
|
||||
max_subscriptions_per_request: 150, // 148 in theory = (64 attestation + 4 sync committee + 6 core topics) * 2
|
||||
};
|
||||
|
||||
config.gs_config = gossipsub_config(fork_context.clone());
|
||||
config.gs_config = gossipsub_config(ctx.fork_context.clone());
|
||||
|
||||
// If metrics are enabled for gossipsub build the configuration
|
||||
let gossipsub_metrics = ctx
|
||||
.gossipsub_registry
|
||||
.map(|registry| (registry, GossipsubMetricsConfig::default()));
|
||||
|
||||
// Build and configure the Gossipsub behaviour
|
||||
let snappy_transform = SnappyTransform::new(config.gs_config.max_transmit_size());
|
||||
let mut gossipsub = Gossipsub::new_with_subscription_filter_and_transform(
|
||||
MessageAuthenticity::Anonymous,
|
||||
config.gs_config.clone(),
|
||||
None, // No metrics for the time being
|
||||
gossipsub_metrics,
|
||||
filter,
|
||||
snappy_transform,
|
||||
)
|
||||
@ -246,7 +251,7 @@ impl<TSpec: EthSpec> Behaviour<TSpec> {
|
||||
|
||||
let thresholds = lighthouse_gossip_thresholds();
|
||||
|
||||
let score_settings = PeerScoreSettings::new(chain_spec, &config.gs_config);
|
||||
let score_settings = PeerScoreSettings::new(ctx.chain_spec, &config.gs_config);
|
||||
|
||||
// Prepare scoring parameters
|
||||
let params = score_settings.get_peer_score_params(
|
||||
@ -267,6 +272,7 @@ impl<TSpec: EthSpec> Behaviour<TSpec> {
|
||||
|
||||
let peer_manager_cfg = PeerManagerCfg {
|
||||
discovery_enabled: !config.disable_discovery,
|
||||
metrics_enabled: config.metrics_enabled,
|
||||
target_peer_count: config.target_peers,
|
||||
..Default::default()
|
||||
};
|
||||
@ -274,7 +280,7 @@ impl<TSpec: EthSpec> Behaviour<TSpec> {
|
||||
Ok(Behaviour {
|
||||
// Sub-behaviours
|
||||
gossipsub,
|
||||
eth2_rpc: RPC::new(fork_context.clone(), log.clone()),
|
||||
eth2_rpc: RPC::new(ctx.fork_context.clone(), log.clone()),
|
||||
discovery,
|
||||
identify: Identify::new(identify_config),
|
||||
// Auxiliary fields
|
||||
@ -287,7 +293,7 @@ impl<TSpec: EthSpec> Behaviour<TSpec> {
|
||||
network_dir: config.network_dir.clone(),
|
||||
log: behaviour_log,
|
||||
score_settings,
|
||||
fork_context,
|
||||
fork_context: ctx.fork_context,
|
||||
update_gossipsub_scores,
|
||||
})
|
||||
}
|
||||
@ -393,14 +399,15 @@ impl<TSpec: EthSpec> Behaviour<TSpec> {
|
||||
.remove(&topic);
|
||||
|
||||
// unsubscribe from the topic
|
||||
let topic: Topic = topic.into();
|
||||
let libp2p_topic: Topic = topic.clone().into();
|
||||
|
||||
match self.gossipsub.unsubscribe(&topic) {
|
||||
match self.gossipsub.unsubscribe(&libp2p_topic) {
|
||||
Err(_) => {
|
||||
warn!(self.log, "Failed to unsubscribe from topic"; "topic" => %topic);
|
||||
warn!(self.log, "Failed to unsubscribe from topic"; "topic" => %libp2p_topic);
|
||||
false
|
||||
}
|
||||
Ok(v) => {
|
||||
// Inform the network
|
||||
debug!(self.log, "Unsubscribed to topic"; "topic" => %topic);
|
||||
v
|
||||
}
|
||||
@ -732,6 +739,18 @@ impl<TSpec: EthSpec> Behaviour<TSpec> {
|
||||
|
||||
/// Convenience function to propagate a request.
|
||||
fn propagate_request(&mut self, id: PeerRequestId, peer_id: PeerId, request: Request) {
|
||||
// Increment metrics
|
||||
match &request {
|
||||
Request::Status(_) => {
|
||||
metrics::inc_counter_vec(&metrics::TOTAL_RPC_REQUESTS, &["status"])
|
||||
}
|
||||
Request::BlocksByRange { .. } => {
|
||||
metrics::inc_counter_vec(&metrics::TOTAL_RPC_REQUESTS, &["blocks_by_range"])
|
||||
}
|
||||
Request::BlocksByRoot { .. } => {
|
||||
metrics::inc_counter_vec(&metrics::TOTAL_RPC_REQUESTS, &["blocks_by_root"])
|
||||
}
|
||||
}
|
||||
self.add_event(BehaviourEvent::RequestReceived {
|
||||
peer_id,
|
||||
id,
|
||||
|
@ -127,7 +127,7 @@ pub fn use_or_load_enr(
|
||||
pub fn build_or_load_enr<T: EthSpec>(
|
||||
local_key: Keypair,
|
||||
config: &NetworkConfig,
|
||||
enr_fork_id: EnrForkId,
|
||||
enr_fork_id: &EnrForkId,
|
||||
log: &slog::Logger,
|
||||
) -> Result<Enr, String> {
|
||||
// Build the local ENR.
|
||||
@ -163,7 +163,7 @@ pub fn create_enr_builder_from_config<T: EnrKey>(
|
||||
pub fn build_enr<T: EthSpec>(
|
||||
enr_key: &CombinedKey,
|
||||
config: &NetworkConfig,
|
||||
enr_fork_id: EnrForkId,
|
||||
enr_fork_id: &EnrForkId,
|
||||
) -> Result<Enr, String> {
|
||||
let mut builder = create_enr_builder_from_config(config, true);
|
||||
|
||||
|
@ -1039,6 +1039,7 @@ impl<TSpec: EthSpec> NetworkBehaviour for Discovery<TSpec> {
|
||||
Discv5Event::SocketUpdated(socket) => {
|
||||
info!(self.log, "Address updated"; "ip" => %socket.ip(), "udp_port" => %socket.port());
|
||||
metrics::inc_counter(&metrics::ADDRESS_UPDATE_COUNT);
|
||||
metrics::check_nat();
|
||||
// Discv5 will have updated our local ENR. We save the updated version
|
||||
// to disk.
|
||||
let enr = self.discv5.local_enr();
|
||||
@ -1096,7 +1097,7 @@ mod tests {
|
||||
..Default::default()
|
||||
};
|
||||
let enr_key: CombinedKey = CombinedKey::from_libp2p(&keypair).unwrap();
|
||||
let enr: Enr = build_enr::<E>(&enr_key, &config, EnrForkId::default()).unwrap();
|
||||
let enr: Enr = build_enr::<E>(&enr_key, &config, &EnrForkId::default()).unwrap();
|
||||
let log = build_log(slog::Level::Debug, false);
|
||||
let globals = NetworkGlobals::new(
|
||||
enr,
|
||||
|
@ -10,7 +10,7 @@ mod config;
|
||||
|
||||
#[allow(clippy::mutable_key_type)] // PeerId in hashmaps are no longer permitted by clippy
|
||||
pub mod discovery;
|
||||
mod metrics;
|
||||
pub mod metrics;
|
||||
pub mod peer_manager;
|
||||
pub mod rpc;
|
||||
mod service;
|
||||
@ -66,13 +66,16 @@ pub use crate::types::{
|
||||
error, Enr, EnrSyncCommitteeBitfield, GossipTopic, NetworkGlobals, PubsubMessage, Subnet,
|
||||
SubnetDiscovery,
|
||||
};
|
||||
|
||||
pub use open_metrics_client;
|
||||
|
||||
pub use behaviour::{BehaviourEvent, Gossipsub, PeerRequestId, Request, Response};
|
||||
pub use config::Config as NetworkConfig;
|
||||
pub use discovery::{CombinedKeyExt, EnrExt, Eth2Enr};
|
||||
pub use discv5;
|
||||
pub use libp2p;
|
||||
pub use libp2p::bandwidth::BandwidthSinks;
|
||||
pub use libp2p::gossipsub::{MessageAcceptance, MessageId, Topic, TopicHash};
|
||||
pub use libp2p::gossipsub::{IdentTopic, MessageAcceptance, MessageId, Topic, TopicHash};
|
||||
pub use libp2p::{core::ConnectedPoint, PeerId, Swarm};
|
||||
pub use libp2p::{multiaddr, Multiaddr};
|
||||
pub use metrics::scrape_discovery_metrics;
|
||||
@ -82,4 +85,4 @@ pub use peer_manager::{
|
||||
peerdb::PeerDB,
|
||||
ConnectionDirection, PeerConnectionStatus, PeerInfo, PeerManager, SyncInfo, SyncStatus,
|
||||
};
|
||||
pub use service::{load_private_key, Libp2pEvent, Service, NETWORK_KEY_FILENAME};
|
||||
pub use service::{load_private_key, Context, Libp2pEvent, Service, NETWORK_KEY_FILENAME};
|
||||
|
@ -1,16 +1,19 @@
|
||||
pub use lighthouse_metrics::*;
|
||||
|
||||
lazy_static! {
|
||||
pub static ref NAT_OPEN: Result<IntCounter> = try_create_int_counter(
|
||||
"nat_open",
|
||||
"An estimate indicating if the local node is exposed to the internet."
|
||||
);
|
||||
pub static ref ADDRESS_UPDATE_COUNT: Result<IntCounter> = try_create_int_counter(
|
||||
"libp2p_address_update_total",
|
||||
"Count of libp2p socked updated events (when our view of our IP address has changed)"
|
||||
);
|
||||
pub static ref PEERS_CONNECTED: Result<IntGauge> = try_create_int_gauge(
|
||||
"libp2p_peer_connected_peers_total",
|
||||
"libp2p_peers",
|
||||
"Count of libp2p peers currently connected"
|
||||
);
|
||||
pub static ref PEERS_CONNECTED_INTEROP: Result<IntGauge> =
|
||||
try_create_int_gauge("libp2p_peers", "Count of libp2p peers currently connected");
|
||||
|
||||
pub static ref PEER_CONNECT_EVENT_COUNT: Result<IntCounter> = try_create_int_counter(
|
||||
"libp2p_peer_connect_event_total",
|
||||
"Count of libp2p peer connect events (not the current number of connected peers)"
|
||||
@ -19,6 +22,14 @@ lazy_static! {
|
||||
"libp2p_peer_disconnect_event_total",
|
||||
"Count of libp2p peer disconnect events"
|
||||
);
|
||||
pub static ref DISCOVERY_SENT_BYTES: Result<IntGauge> = try_create_int_gauge(
|
||||
"discovery_sent_bytes",
|
||||
"The number of bytes sent in discovery"
|
||||
);
|
||||
pub static ref DISCOVERY_RECV_BYTES: Result<IntGauge> = try_create_int_gauge(
|
||||
"discovery_recv_bytes",
|
||||
"The number of bytes received in discovery"
|
||||
);
|
||||
pub static ref DISCOVERY_QUEUE: Result<IntGauge> = try_create_int_gauge(
|
||||
"discovery_queue_size",
|
||||
"The number of discovery queries awaiting execution"
|
||||
@ -31,11 +42,7 @@ lazy_static! {
|
||||
"discovery_sessions",
|
||||
"The number of active discovery sessions with peers"
|
||||
);
|
||||
pub static ref DISCOVERY_REQS_IP: Result<GaugeVec> = try_create_float_gauge_vec(
|
||||
"discovery_reqs_per_ip",
|
||||
"Unsolicited discovery requests per ip per second",
|
||||
&["Addresses"]
|
||||
);
|
||||
|
||||
pub static ref PEERS_PER_CLIENT: Result<IntGaugeVec> = try_create_int_gauge_vec(
|
||||
"libp2p_peers_per_client",
|
||||
"The connected peers via client implementation",
|
||||
@ -57,6 +64,11 @@ lazy_static! {
|
||||
"RPC errors per client",
|
||||
&["client", "rpc_error", "direction"]
|
||||
);
|
||||
pub static ref TOTAL_RPC_REQUESTS: Result<IntCounterVec> = try_create_int_counter_vec(
|
||||
"libp2p_rpc_requests_total",
|
||||
"RPC requests total",
|
||||
&["type"]
|
||||
);
|
||||
pub static ref PEER_ACTION_EVENTS_PER_CLIENT: Result<IntCounterVec> =
|
||||
try_create_int_counter_vec(
|
||||
"libp2p_peer_actions_per_client",
|
||||
@ -69,26 +81,57 @@ lazy_static! {
|
||||
"Gossipsub messages that we did not accept, per client",
|
||||
&["client", "validation_result"]
|
||||
);
|
||||
|
||||
pub static ref PEER_SCORE_DISTRIBUTION: Result<IntGaugeVec> =
|
||||
try_create_int_gauge_vec(
|
||||
"peer_score_distribution",
|
||||
"The distribution of connected peer scores",
|
||||
&["position"]
|
||||
);
|
||||
|
||||
pub static ref PEER_SCORE_PER_CLIENT: Result<GaugeVec> =
|
||||
try_create_float_gauge_vec(
|
||||
"peer_score_per_client",
|
||||
"Average score per client",
|
||||
&["client"]
|
||||
);
|
||||
|
||||
/*
|
||||
* Inbound/Outbound peers
|
||||
*/
|
||||
/// The number of peers that dialed us.
|
||||
pub static ref NETWORK_INBOUND_PEERS: Result<IntGauge> =
|
||||
try_create_int_gauge("network_inbound_peers","The number of peers that are currently connected that have dialed us.");
|
||||
|
||||
/// The number of peers that we dialed us.
|
||||
pub static ref NETWORK_OUTBOUND_PEERS: Result<IntGauge> =
|
||||
try_create_int_gauge("network_outbound_peers","The number of peers that are currently connected that we dialed.");
|
||||
}
|
||||
|
||||
/// Checks if we consider the NAT open.
|
||||
///
|
||||
/// Conditions for an open NAT:
|
||||
/// 1. We have 1 or more SOCKET_UPDATED messages. This occurs when discovery has a majority of
|
||||
/// users reporting an external port and our ENR gets updated.
|
||||
/// 2. We have 0 SOCKET_UPDATED messages (can be true if the port was correct on boot), then we
|
||||
/// rely on whether we have any inbound messages. If we have no socket update messages, but
|
||||
/// manage to get at least one inbound peer, we are exposed correctly.
|
||||
pub fn check_nat() {
|
||||
// NAT is already deemed open.
|
||||
if NAT_OPEN.as_ref().map(|v| v.get()).unwrap_or(0) != 0 {
|
||||
return;
|
||||
}
|
||||
if ADDRESS_UPDATE_COUNT.as_ref().map(|v| v.get()).unwrap_or(0) == 0
|
||||
|| NETWORK_INBOUND_PEERS.as_ref().map(|v| v.get()).unwrap_or(0) != 0_i64
|
||||
{
|
||||
inc_counter(&NAT_OPEN);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn scrape_discovery_metrics() {
|
||||
let metrics = discv5::metrics::Metrics::from(discv5::Discv5::raw_metrics());
|
||||
|
||||
set_float_gauge(&DISCOVERY_REQS, metrics.unsolicited_requests_per_second);
|
||||
|
||||
set_gauge(&DISCOVERY_SESSIONS, metrics.active_sessions as i64);
|
||||
|
||||
let process_gauge_vec = |gauge: &Result<GaugeVec>, metrics: discv5::metrics::Metrics| {
|
||||
if let Ok(gauge_vec) = gauge {
|
||||
gauge_vec.reset();
|
||||
for (ip, value) in metrics.requests_per_ip_per_second.iter() {
|
||||
if let Ok(metric) = gauge_vec.get_metric_with_label_values(&[&format!("{:?}", ip)])
|
||||
{
|
||||
metric.set(*value);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
process_gauge_vec(&DISCOVERY_REQS_IP, metrics);
|
||||
set_gauge(&DISCOVERY_SENT_BYTES, metrics.bytes_sent as i64);
|
||||
set_gauge(&DISCOVERY_RECV_BYTES, metrics.bytes_recv as i64);
|
||||
}
|
||||
|
@ -16,6 +16,8 @@ pub struct Config {
|
||||
/* Peer count related configurations */
|
||||
/// Whether discovery is enabled.
|
||||
pub discovery_enabled: bool,
|
||||
/// Whether metrics are enabled.
|
||||
pub metrics_enabled: bool,
|
||||
/// Target number of peers to connect to.
|
||||
pub target_peer_count: usize,
|
||||
|
||||
@ -34,6 +36,7 @@ impl Default for Config {
|
||||
fn default() -> Self {
|
||||
Config {
|
||||
discovery_enabled: true,
|
||||
metrics_enabled: false,
|
||||
target_peer_count: DEFAULT_TARGET_PEERS,
|
||||
status_interval: DEFAULT_STATUS_INTERVAL,
|
||||
ping_interval_inbound: DEFAULT_PING_INTERVAL_INBOUND,
|
||||
|
@ -8,13 +8,14 @@ use crate::{Subnet, SubnetDiscovery};
|
||||
use discv5::Enr;
|
||||
use hashset_delay::HashSetDelay;
|
||||
use libp2p::identify::IdentifyInfo;
|
||||
use peerdb::{BanOperation, BanResult, ScoreUpdateResult};
|
||||
use peerdb::{client::ClientKind, BanOperation, BanResult, ScoreUpdateResult};
|
||||
use slog::{debug, error, warn};
|
||||
use smallvec::SmallVec;
|
||||
use std::{
|
||||
sync::Arc,
|
||||
time::{Duration, Instant},
|
||||
};
|
||||
use strum::IntoEnumIterator;
|
||||
use types::{EthSpec, SyncSubnetId};
|
||||
|
||||
pub use libp2p::core::{identity::Keypair, Multiaddr};
|
||||
@ -71,6 +72,8 @@ pub struct PeerManager<TSpec: EthSpec> {
|
||||
heartbeat: tokio::time::Interval,
|
||||
/// Keeps track of whether the discovery service is enabled or not.
|
||||
discovery_enabled: bool,
|
||||
/// Keeps track if the current instance is reporting metrics or not.
|
||||
metrics_enabled: bool,
|
||||
/// The logger associated with the `PeerManager`.
|
||||
log: slog::Logger,
|
||||
}
|
||||
@ -111,6 +114,7 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
|
||||
) -> error::Result<Self> {
|
||||
let config::Config {
|
||||
discovery_enabled,
|
||||
metrics_enabled,
|
||||
target_peer_count,
|
||||
status_interval,
|
||||
ping_interval_inbound,
|
||||
@ -130,6 +134,7 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
|
||||
sync_committee_subnets: Default::default(),
|
||||
heartbeat,
|
||||
discovery_enabled,
|
||||
metrics_enabled,
|
||||
log: log.clone(),
|
||||
})
|
||||
}
|
||||
@ -378,19 +383,21 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
|
||||
"protocols" => ?info.protocols
|
||||
);
|
||||
|
||||
// update the peer client kind metric
|
||||
if let Some(v) = metrics::get_int_gauge(
|
||||
// update the peer client kind metric if the peer is connected
|
||||
if matches!(
|
||||
peer_info.connection_status(),
|
||||
PeerConnectionStatus::Connected { .. }
|
||||
| PeerConnectionStatus::Disconnecting { .. }
|
||||
) {
|
||||
metrics::inc_gauge_vec(
|
||||
&metrics::PEERS_PER_CLIENT,
|
||||
&[&peer_info.client().kind.to_string()],
|
||||
) {
|
||||
v.inc()
|
||||
};
|
||||
if let Some(v) = metrics::get_int_gauge(
|
||||
);
|
||||
metrics::dec_gauge_vec(
|
||||
&metrics::PEERS_PER_CLIENT,
|
||||
&[&previous_kind.to_string()],
|
||||
) {
|
||||
v.dec()
|
||||
};
|
||||
);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
error!(self.log, "Received an Identify response from an unknown peer"; "peer_id" => peer_id.to_string());
|
||||
@ -606,6 +613,46 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
|
||||
}
|
||||
}
|
||||
|
||||
// This function updates metrics for all connected peers.
|
||||
fn update_connected_peer_metrics(&self) {
|
||||
// Do nothing if we don't have metrics enabled.
|
||||
if !self.metrics_enabled {
|
||||
return;
|
||||
}
|
||||
|
||||
let mut connected_peer_count = 0;
|
||||
let mut inbound_connected_peers = 0;
|
||||
let mut outbound_connected_peers = 0;
|
||||
let mut clients_per_peer = HashMap::new();
|
||||
|
||||
for (_peer, peer_info) in self.network_globals.peers.read().connected_peers() {
|
||||
connected_peer_count += 1;
|
||||
if let PeerConnectionStatus::Connected { n_in, .. } = peer_info.connection_status() {
|
||||
if *n_in > 0 {
|
||||
inbound_connected_peers += 1;
|
||||
} else {
|
||||
outbound_connected_peers += 1;
|
||||
}
|
||||
}
|
||||
*clients_per_peer
|
||||
.entry(peer_info.client().kind.to_string())
|
||||
.or_default() += 1;
|
||||
}
|
||||
|
||||
metrics::set_gauge(&metrics::PEERS_CONNECTED, connected_peer_count);
|
||||
metrics::set_gauge(&metrics::NETWORK_INBOUND_PEERS, inbound_connected_peers);
|
||||
metrics::set_gauge(&metrics::NETWORK_OUTBOUND_PEERS, outbound_connected_peers);
|
||||
|
||||
for client_kind in ClientKind::iter() {
|
||||
let value = clients_per_peer.get(&client_kind.to_string()).unwrap_or(&0);
|
||||
metrics::set_gauge_vec(
|
||||
&metrics::PEERS_PER_CLIENT,
|
||||
&[&client_kind.to_string()],
|
||||
*value as i64,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/* Internal functions */
|
||||
|
||||
/// Sets a peer as connected as long as their reputation allows it
|
||||
@ -705,22 +752,6 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
|
||||
// increment prometheus metrics
|
||||
metrics::inc_counter(&metrics::PEER_CONNECT_EVENT_COUNT);
|
||||
metrics::set_gauge(&metrics::PEERS_CONNECTED, connected_peers);
|
||||
metrics::set_gauge(&metrics::PEERS_CONNECTED_INTEROP, connected_peers);
|
||||
|
||||
// Increment the PEERS_PER_CLIENT metric
|
||||
if let Some(kind) = self
|
||||
.network_globals
|
||||
.peers
|
||||
.read()
|
||||
.peer_info(peer_id)
|
||||
.map(|peer_info| peer_info.client().kind.clone())
|
||||
{
|
||||
if let Some(v) =
|
||||
metrics::get_int_gauge(&metrics::PEERS_PER_CLIENT, &[&kind.to_string()])
|
||||
{
|
||||
v.inc()
|
||||
};
|
||||
}
|
||||
|
||||
true
|
||||
}
|
||||
@ -802,6 +833,9 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
|
||||
self.handle_score_action(&peer_id, action, None);
|
||||
}
|
||||
|
||||
// Update peer score metrics;
|
||||
self.update_peer_score_metrics();
|
||||
|
||||
// Maintain minimum count for sync committee peers.
|
||||
self.maintain_sync_committee_peers();
|
||||
|
||||
@ -840,6 +874,75 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
|
||||
self.disconnect_peer(peer_id, GoodbyeReason::TooManyPeers);
|
||||
}
|
||||
}
|
||||
|
||||
// Update metrics related to peer scoring.
|
||||
fn update_peer_score_metrics(&self) {
|
||||
if !self.metrics_enabled {
|
||||
return;
|
||||
}
|
||||
// reset the gauges
|
||||
let _ = metrics::PEER_SCORE_DISTRIBUTION
|
||||
.as_ref()
|
||||
.map(|gauge| gauge.reset());
|
||||
let _ = metrics::PEER_SCORE_PER_CLIENT
|
||||
.as_ref()
|
||||
.map(|gauge| gauge.reset());
|
||||
|
||||
let mut avg_score_per_client: HashMap<String, (f64, usize)> = HashMap::with_capacity(5);
|
||||
{
|
||||
let peers_db_read_lock = self.network_globals.peers.read();
|
||||
let connected_peers = peers_db_read_lock.best_peers_by_status(PeerInfo::is_connected);
|
||||
let total_peers = connected_peers.len();
|
||||
for (id, (_peer, peer_info)) in connected_peers.into_iter().enumerate() {
|
||||
// First quartile
|
||||
if id == 0 {
|
||||
metrics::set_gauge_vec(
|
||||
&metrics::PEER_SCORE_DISTRIBUTION,
|
||||
&["1st"],
|
||||
peer_info.score().score() as i64,
|
||||
);
|
||||
} else if id == (total_peers * 3 / 4).saturating_sub(1) {
|
||||
metrics::set_gauge_vec(
|
||||
&metrics::PEER_SCORE_DISTRIBUTION,
|
||||
&["3/4"],
|
||||
peer_info.score().score() as i64,
|
||||
);
|
||||
} else if id == (total_peers / 2).saturating_sub(1) {
|
||||
metrics::set_gauge_vec(
|
||||
&metrics::PEER_SCORE_DISTRIBUTION,
|
||||
&["1/2"],
|
||||
peer_info.score().score() as i64,
|
||||
);
|
||||
} else if id == (total_peers / 4).saturating_sub(1) {
|
||||
metrics::set_gauge_vec(
|
||||
&metrics::PEER_SCORE_DISTRIBUTION,
|
||||
&["1/4"],
|
||||
peer_info.score().score() as i64,
|
||||
);
|
||||
} else if id == total_peers.saturating_sub(1) {
|
||||
metrics::set_gauge_vec(
|
||||
&metrics::PEER_SCORE_DISTRIBUTION,
|
||||
&["last"],
|
||||
peer_info.score().score() as i64,
|
||||
);
|
||||
}
|
||||
|
||||
let mut score_peers: &mut (f64, usize) = avg_score_per_client
|
||||
.entry(peer_info.client().kind.to_string())
|
||||
.or_default();
|
||||
score_peers.0 += peer_info.score().score();
|
||||
score_peers.1 += 1;
|
||||
}
|
||||
} // read lock ended
|
||||
|
||||
for (client, (score, peers)) in avg_score_per_client {
|
||||
metrics::set_float_gauge_vec(
|
||||
&metrics::PEER_SCORE_PER_CLIENT,
|
||||
&[&client.to_string()],
|
||||
score / (peers as f64),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
enum ConnectingType {
|
||||
|
@ -111,8 +111,11 @@ impl<TSpec: EthSpec> NetworkBehaviour for PeerManager<TSpec> {
|
||||
endpoint: &ConnectedPoint,
|
||||
_failed_addresses: Option<&Vec<Multiaddr>>,
|
||||
) {
|
||||
// Log the connection
|
||||
debug!(self.log, "Connection established"; "peer_id" => %peer_id, "connection" => ?endpoint.to_endpoint());
|
||||
// Check NAT if metrics are enabled
|
||||
if self.network_globals.local_enr.read().udp().is_some() {
|
||||
metrics::check_nat();
|
||||
}
|
||||
|
||||
// Check to make sure the peer is not supposed to be banned
|
||||
match self.ban_status(peer_id) {
|
||||
@ -150,10 +153,8 @@ impl<TSpec: EthSpec> NetworkBehaviour for PeerManager<TSpec> {
|
||||
return;
|
||||
}
|
||||
|
||||
// Register the newly connected peer (regardless if we are about to disconnect them).
|
||||
// NOTE: We don't register peers that we are disconnecting immediately. The network service
|
||||
// does not need to know about these peers.
|
||||
// let enr
|
||||
match endpoint {
|
||||
ConnectedPoint::Listener { send_back_addr, .. } => {
|
||||
self.inject_connect_ingoing(peer_id, send_back_addr.clone(), None);
|
||||
@ -167,12 +168,9 @@ impl<TSpec: EthSpec> NetworkBehaviour for PeerManager<TSpec> {
|
||||
}
|
||||
}
|
||||
|
||||
let connected_peers = self.network_globals.connected_peers() as i64;
|
||||
|
||||
// increment prometheus metrics
|
||||
self.update_connected_peer_metrics();
|
||||
metrics::inc_counter(&metrics::PEER_CONNECT_EVENT_COUNT);
|
||||
metrics::set_gauge(&metrics::PEERS_CONNECTED, connected_peers);
|
||||
metrics::set_gauge(&metrics::PEERS_CONNECTED_INTEROP, connected_peers);
|
||||
}
|
||||
|
||||
fn inject_disconnected(&mut self, peer_id: &PeerId) {
|
||||
@ -190,21 +188,6 @@ impl<TSpec: EthSpec> NetworkBehaviour for PeerManager<TSpec> {
|
||||
self.events
|
||||
.push(PeerManagerEvent::PeerDisconnected(*peer_id));
|
||||
debug!(self.log, "Peer disconnected"; "peer_id" => %peer_id);
|
||||
|
||||
// Decrement the PEERS_PER_CLIENT metric
|
||||
if let Some(kind) = self
|
||||
.network_globals
|
||||
.peers
|
||||
.read()
|
||||
.peer_info(peer_id)
|
||||
.map(|info| info.client().kind.clone())
|
||||
{
|
||||
if let Some(v) =
|
||||
metrics::get_int_gauge(&metrics::PEERS_PER_CLIENT, &[&kind.to_string()])
|
||||
{
|
||||
v.dec()
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// NOTE: It may be the case that a rejected node, due to too many peers is disconnected
|
||||
@ -212,12 +195,9 @@ impl<TSpec: EthSpec> NetworkBehaviour for PeerManager<TSpec> {
|
||||
// reference so that peer manager can track this peer.
|
||||
self.inject_disconnect(peer_id);
|
||||
|
||||
let connected_peers = self.network_globals.connected_peers() as i64;
|
||||
|
||||
// Update the prometheus metrics
|
||||
self.update_connected_peer_metrics();
|
||||
metrics::inc_counter(&metrics::PEER_DISCONNECT_EVENT_COUNT);
|
||||
metrics::set_gauge(&metrics::PEERS_CONNECTED, connected_peers);
|
||||
metrics::set_gauge(&metrics::PEERS_CONNECTED_INTEROP, connected_peers);
|
||||
}
|
||||
|
||||
fn inject_address_change(
|
||||
|
@ -4,7 +4,7 @@
|
||||
|
||||
use libp2p::identify::IdentifyInfo;
|
||||
use serde::Serialize;
|
||||
use strum::{AsRefStr, AsStaticStr};
|
||||
use strum::{AsRefStr, AsStaticStr, EnumIter};
|
||||
|
||||
/// Various client and protocol information related to a node.
|
||||
#[derive(Clone, Debug, Serialize)]
|
||||
@ -21,7 +21,7 @@ pub struct Client {
|
||||
pub agent_string: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, PartialEq, AsRefStr, AsStaticStr)]
|
||||
#[derive(Clone, Debug, Serialize, PartialEq, AsRefStr, AsStaticStr, EnumIter)]
|
||||
pub enum ClientKind {
|
||||
/// A lighthouse node (the best kind).
|
||||
Lighthouse,
|
||||
|
@ -19,8 +19,6 @@ use PeerConnectionStatus::*;
|
||||
#[derive(Clone, Debug, Serialize)]
|
||||
#[serde(bound = "T: EthSpec")]
|
||||
pub struct PeerInfo<T: EthSpec> {
|
||||
/// The connection status of the peer
|
||||
_status: PeerStatus,
|
||||
/// The peers reputation
|
||||
score: Score,
|
||||
/// Client managing this peer
|
||||
@ -57,7 +55,6 @@ pub struct PeerInfo<T: EthSpec> {
|
||||
impl<TSpec: EthSpec> Default for PeerInfo<TSpec> {
|
||||
fn default() -> PeerInfo<TSpec> {
|
||||
PeerInfo {
|
||||
_status: Default::default(),
|
||||
score: Score::default(),
|
||||
client: Client::default(),
|
||||
connection_status: Default::default(),
|
||||
@ -387,21 +384,6 @@ impl<T: EthSpec> PeerInfo<T> {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize)]
|
||||
/// The current health status of the peer.
|
||||
pub enum PeerStatus {
|
||||
/// The peer is healthy.
|
||||
Healthy,
|
||||
/// The peer is clogged. It has not been responding to requests on time.
|
||||
_Clogged,
|
||||
}
|
||||
|
||||
impl Default for PeerStatus {
|
||||
fn default() -> Self {
|
||||
PeerStatus::Healthy
|
||||
}
|
||||
}
|
||||
|
||||
/// Connection Direction of connection.
|
||||
#[derive(Debug, Clone, Serialize, AsRefStr)]
|
||||
#[strum(serialize_all = "snake_case")]
|
||||
|
@ -20,6 +20,7 @@ use libp2p::{
|
||||
swarm::{SwarmBuilder, SwarmEvent},
|
||||
PeerId, Swarm, Transport,
|
||||
};
|
||||
use open_metrics_client::registry::Registry;
|
||||
use slog::{crit, debug, info, o, trace, warn, Logger};
|
||||
use ssz::Decode;
|
||||
use std::fs::File;
|
||||
@ -62,27 +63,34 @@ pub struct Service<TSpec: EthSpec> {
|
||||
pub log: Logger,
|
||||
}
|
||||
|
||||
pub struct Context<'a> {
|
||||
pub config: &'a NetworkConfig,
|
||||
pub enr_fork_id: EnrForkId,
|
||||
pub fork_context: Arc<ForkContext>,
|
||||
pub chain_spec: &'a ChainSpec,
|
||||
pub gossipsub_registry: Option<&'a mut Registry>,
|
||||
}
|
||||
|
||||
impl<TSpec: EthSpec> Service<TSpec> {
|
||||
pub async fn new(
|
||||
executor: task_executor::TaskExecutor,
|
||||
config: &NetworkConfig,
|
||||
enr_fork_id: EnrForkId,
|
||||
ctx: Context<'_>,
|
||||
log: &Logger,
|
||||
fork_context: Arc<ForkContext>,
|
||||
chain_spec: &ChainSpec,
|
||||
) -> error::Result<(Arc<NetworkGlobals<TSpec>>, Self)> {
|
||||
let log = log.new(o!("service"=> "libp2p"));
|
||||
trace!(log, "Libp2p Service starting");
|
||||
|
||||
let config = ctx.config;
|
||||
// initialise the node's ID
|
||||
let local_keypair = load_private_key(config, &log);
|
||||
|
||||
// Create an ENR or load from disk if appropriate
|
||||
let enr =
|
||||
enr::build_or_load_enr::<TSpec>(local_keypair.clone(), config, enr_fork_id, &log)?;
|
||||
enr::build_or_load_enr::<TSpec>(local_keypair.clone(), config, &ctx.enr_fork_id, &log)?;
|
||||
|
||||
let local_peer_id = enr.peer_id();
|
||||
|
||||
// Construct the metadata
|
||||
let meta_data = load_or_build_metadata(&config.network_dir, &log);
|
||||
|
||||
// set up a collection of variables accessible outside of the network crate
|
||||
@ -113,15 +121,8 @@ impl<TSpec: EthSpec> Service<TSpec> {
|
||||
.map_err(|e| format!("Failed to build transport: {:?}", e))?;
|
||||
|
||||
// Lighthouse network behaviour
|
||||
let behaviour = Behaviour::new(
|
||||
&local_keypair,
|
||||
config.clone(),
|
||||
network_globals.clone(),
|
||||
&log,
|
||||
fork_context,
|
||||
chain_spec,
|
||||
)
|
||||
.await?;
|
||||
let behaviour =
|
||||
Behaviour::new(&local_keypair, ctx, network_globals.clone(), &log).await?;
|
||||
|
||||
// use the executor for libp2p
|
||||
struct Executor(task_executor::TaskExecutor);
|
||||
|
@ -128,16 +128,15 @@ pub async fn build_libp2p_instance(
|
||||
let (signal, exit) = exit_future::signal();
|
||||
let (shutdown_tx, _) = futures::channel::mpsc::channel(1);
|
||||
let executor = task_executor::TaskExecutor::new(rt, exit, log.clone(), shutdown_tx);
|
||||
let fork_context = Arc::new(fork_context());
|
||||
let libp2p_context = lighthouse_network::Context {
|
||||
config: &config,
|
||||
enr_fork_id: EnrForkId::default(),
|
||||
fork_context: Arc::new(fork_context()),
|
||||
chain_spec: &ChainSpec::minimal(),
|
||||
gossipsub_registry: None,
|
||||
};
|
||||
Libp2pInstance(
|
||||
LibP2PService::new(
|
||||
executor,
|
||||
&config,
|
||||
EnrForkId::default(),
|
||||
&log,
|
||||
fork_context,
|
||||
&ChainSpec::minimal(),
|
||||
)
|
||||
LibP2PService::new(executor, libp2p_context, &log)
|
||||
.await
|
||||
.expect("should build libp2p instance")
|
||||
.1,
|
||||
|
@ -4,216 +4,42 @@ use beacon_chain::{
|
||||
};
|
||||
use fnv::FnvHashMap;
|
||||
pub use lighthouse_metrics::*;
|
||||
use lighthouse_network::PubsubMessage;
|
||||
use lighthouse_network::{
|
||||
types::GossipKind, BandwidthSinks, GossipTopic, Gossipsub, NetworkGlobals, TopicHash,
|
||||
types::GossipKind, BandwidthSinks, GossipTopic, Gossipsub, NetworkGlobals,
|
||||
};
|
||||
use std::{collections::HashMap, sync::Arc};
|
||||
use std::sync::Arc;
|
||||
use strum::AsStaticRef;
|
||||
use types::{
|
||||
consts::altair::SYNC_COMMITTEE_SUBNET_COUNT, subnet_id::subnet_id_to_string,
|
||||
sync_subnet_id::sync_subnet_id_to_string, EthSpec,
|
||||
};
|
||||
use types::EthSpec;
|
||||
|
||||
lazy_static! {
|
||||
|
||||
/*
|
||||
* Gossip subnets and scoring
|
||||
*/
|
||||
pub static ref PEERS_PER_PROTOCOL: Result<IntGaugeVec> = try_create_int_gauge_vec(
|
||||
"gossipsub_peers_per_protocol",
|
||||
"Peers via supported protocol",
|
||||
&["protocol"]
|
||||
);
|
||||
|
||||
pub static ref GOSSIPSUB_SUBSCRIBED_ATTESTATION_SUBNET_TOPIC: Result<IntGaugeVec> = try_create_int_gauge_vec(
|
||||
"gossipsub_subscribed_attestation_subnets",
|
||||
"Attestation subnets currently subscribed to",
|
||||
&["subnet"]
|
||||
);
|
||||
|
||||
pub static ref GOSSIPSUB_SUBSCRIBED_SYNC_SUBNET_TOPIC: Result<IntGaugeVec> = try_create_int_gauge_vec(
|
||||
"gossipsub_subscribed_sync_subnets",
|
||||
"Sync subnets currently subscribed to",
|
||||
&["subnet"]
|
||||
);
|
||||
|
||||
pub static ref GOSSIPSUB_SUBSCRIBED_PEERS_ATTESTATION_SUBNET_TOPIC: Result<IntGaugeVec> = try_create_int_gauge_vec(
|
||||
"gossipsub_peers_per_attestation_subnet_topic_count",
|
||||
"Peers subscribed per attestation subnet topic",
|
||||
&["subnet"]
|
||||
);
|
||||
|
||||
pub static ref GOSSIPSUB_SUBSCRIBED_PEERS_SYNC_SUBNET_TOPIC: Result<IntGaugeVec> = try_create_int_gauge_vec(
|
||||
"gossipsub_peers_per_sync_subnet_topic_count",
|
||||
"Peers subscribed per sync subnet topic",
|
||||
&["subnet"]
|
||||
);
|
||||
|
||||
pub static ref MESH_PEERS_PER_MAIN_TOPIC: Result<IntGaugeVec> = try_create_int_gauge_vec(
|
||||
"gossipsub_mesh_peers_per_main_topic",
|
||||
"Mesh peers per main topic",
|
||||
&["topic_hash"]
|
||||
);
|
||||
|
||||
pub static ref MESH_PEERS_PER_ATTESTATION_SUBNET_TOPIC: Result<IntGaugeVec> = try_create_int_gauge_vec(
|
||||
"gossipsub_mesh_peers_per_subnet_topic",
|
||||
"Mesh peers per subnet topic",
|
||||
&["subnet"]
|
||||
);
|
||||
|
||||
pub static ref MESH_PEERS_PER_SYNC_SUBNET_TOPIC: Result<IntGaugeVec> = try_create_int_gauge_vec(
|
||||
"gossipsub_mesh_peers_per_subnet_topic",
|
||||
"Mesh peers per subnet topic",
|
||||
&["subnet"]
|
||||
);
|
||||
|
||||
pub static ref AVG_GOSSIPSUB_PEER_SCORE_PER_MAIN_TOPIC: Result<GaugeVec> = try_create_float_gauge_vec(
|
||||
"gossipsub_avg_peer_score_per_topic",
|
||||
"Average peer's score per topic",
|
||||
&["topic_hash"]
|
||||
);
|
||||
|
||||
pub static ref AVG_GOSSIPSUB_PEER_SCORE_PER_ATTESTATION_SUBNET_TOPIC: Result<GaugeVec> = try_create_float_gauge_vec(
|
||||
"gossipsub_avg_peer_score_per_attestation_subnet_topic",
|
||||
"Average peer's score per attestation subnet topic",
|
||||
&["subnet"]
|
||||
);
|
||||
|
||||
pub static ref AVG_GOSSIPSUB_PEER_SCORE_PER_SYNC_SUBNET_TOPIC: Result<GaugeVec> = try_create_float_gauge_vec(
|
||||
"gossipsub_avg_peer_score_per_sync_subnet_topic",
|
||||
"Average peer's score per sync committee subnet topic",
|
||||
&["subnet"]
|
||||
);
|
||||
|
||||
pub static ref ATTESTATIONS_PUBLISHED_PER_SUBNET_PER_SLOT: Result<IntCounterVec> = try_create_int_counter_vec(
|
||||
"gossipsub_attestations_published_per_subnet_per_slot",
|
||||
"Failed attestation publishes per subnet",
|
||||
&["subnet"]
|
||||
);
|
||||
|
||||
pub static ref SCORES_BELOW_ZERO_PER_CLIENT: Result<GaugeVec> = try_create_float_gauge_vec(
|
||||
"gossipsub_scores_below_zero_per_client",
|
||||
"Relative number of scores below zero per client",
|
||||
&["Client"]
|
||||
);
|
||||
pub static ref SCORES_BELOW_GOSSIP_THRESHOLD_PER_CLIENT: Result<GaugeVec> = try_create_float_gauge_vec(
|
||||
"gossipsub_scores_below_gossip_threshold_per_client",
|
||||
"Relative number of scores below gossip threshold per client",
|
||||
&["Client"]
|
||||
);
|
||||
pub static ref SCORES_BELOW_PUBLISH_THRESHOLD_PER_CLIENT: Result<GaugeVec> = try_create_float_gauge_vec(
|
||||
"gossipsub_scores_below_publish_threshold_per_client",
|
||||
"Relative number of scores below publish threshold per client",
|
||||
&["Client"]
|
||||
);
|
||||
pub static ref SCORES_BELOW_GREYLIST_THRESHOLD_PER_CLIENT: Result<GaugeVec> = try_create_float_gauge_vec(
|
||||
"gossipsub_scores_below_greylist_threshold_per_client",
|
||||
"Relative number of scores below greylist threshold per client",
|
||||
&["Client"]
|
||||
);
|
||||
|
||||
pub static ref MIN_SCORES_PER_CLIENT: Result<GaugeVec> = try_create_float_gauge_vec(
|
||||
"gossipsub_min_scores_per_client",
|
||||
"Minimum scores per client",
|
||||
&["Client"]
|
||||
);
|
||||
pub static ref MEDIAN_SCORES_PER_CLIENT: Result<GaugeVec> = try_create_float_gauge_vec(
|
||||
"gossipsub_median_scores_per_client",
|
||||
"Median scores per client",
|
||||
&["Client"]
|
||||
);
|
||||
pub static ref MEAN_SCORES_PER_CLIENT: Result<GaugeVec> = try_create_float_gauge_vec(
|
||||
"gossipsub_mean_scores_per_client",
|
||||
"Mean scores per client",
|
||||
&["Client"]
|
||||
);
|
||||
pub static ref MAX_SCORES_PER_CLIENT: Result<GaugeVec> = try_create_float_gauge_vec(
|
||||
"gossipsub_max_scores_per_client",
|
||||
"Max scores per client",
|
||||
&["Client"]
|
||||
);
|
||||
pub static ref BEACON_BLOCK_MESH_PEERS_PER_CLIENT: Result<IntGaugeVec> =
|
||||
try_create_int_gauge_vec(
|
||||
"block_mesh_peers_per_client",
|
||||
"Number of mesh peers for BeaconBlock topic per client",
|
||||
&["Client"]
|
||||
);
|
||||
|
||||
pub static ref BEACON_AGGREGATE_AND_PROOF_MESH_PEERS_PER_CLIENT: Result<IntGaugeVec> =
|
||||
try_create_int_gauge_vec(
|
||||
"beacon_aggregate_and_proof_mesh_peers_per_client",
|
||||
"Number of mesh peers for BeaconAggregateAndProof topic per client",
|
||||
&["Client"]
|
||||
);
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
/*
|
||||
* Gossip Rx
|
||||
*/
|
||||
pub static ref GOSSIP_BLOCKS_RX: Result<IntCounter> = try_create_int_counter(
|
||||
"gossipsub_blocks_rx_total",
|
||||
"Count of gossip blocks received"
|
||||
);
|
||||
pub static ref GOSSIP_UNAGGREGATED_ATTESTATIONS_RX: Result<IntCounter> = try_create_int_counter(
|
||||
"gossipsub_unaggregated_attestations_rx_total",
|
||||
"Count of gossip unaggregated attestations received"
|
||||
);
|
||||
pub static ref GOSSIP_AGGREGATED_ATTESTATIONS_RX: Result<IntCounter> = try_create_int_counter(
|
||||
"gossipsub_aggregated_attestations_rx_total",
|
||||
"Count of gossip aggregated attestations received"
|
||||
);
|
||||
pub static ref GOSSIP_SYNC_COMMITTEE_MESSAGE_RX: Result<IntCounter> = try_create_int_counter(
|
||||
"gossipsub_sync_committee_message_rx_total",
|
||||
"Count of gossip sync committee messages received"
|
||||
);
|
||||
pub static ref GOSSIP_SYNC_COMMITTEE_CONTRIBUTION_RX: Result<IntCounter> = try_create_int_counter(
|
||||
"gossipsub_sync_committee_contribution_received_total",
|
||||
"Count of gossip sync committee contributions received"
|
||||
);
|
||||
|
||||
|
||||
/*
|
||||
* Gossip Tx
|
||||
*/
|
||||
pub static ref GOSSIP_BLOCKS_TX: Result<IntCounter> = try_create_int_counter(
|
||||
"gossipsub_blocks_tx_total",
|
||||
"Count of gossip blocks transmitted"
|
||||
);
|
||||
pub static ref GOSSIP_UNAGGREGATED_ATTESTATIONS_TX: Result<IntCounter> = try_create_int_counter(
|
||||
"gossipsub_unaggregated_attestations_tx_total",
|
||||
"Count of gossip unaggregated attestations transmitted"
|
||||
);
|
||||
pub static ref GOSSIP_AGGREGATED_ATTESTATIONS_TX: Result<IntCounter> = try_create_int_counter(
|
||||
"gossipsub_aggregated_attestations_tx_total",
|
||||
"Count of gossip aggregated attestations transmitted"
|
||||
);
|
||||
pub static ref GOSSIP_SYNC_COMMITTEE_MESSAGE_TX: Result<IntCounter> = try_create_int_counter(
|
||||
"gossipsub_sync_committee_message_tx_total",
|
||||
"Count of gossip sync committee messages transmitted"
|
||||
);
|
||||
pub static ref GOSSIP_SYNC_COMMITTEE_CONTRIBUTION_TX: Result<IntCounter> = try_create_int_counter(
|
||||
"gossipsub_sync_committee_contribution_tx_total",
|
||||
"Count of gossip sync committee contributions transmitted"
|
||||
);
|
||||
|
||||
/*
|
||||
* Attestation subnet subscriptions
|
||||
*/
|
||||
pub static ref SUBNET_SUBSCRIPTION_REQUESTS: Result<IntCounter> = try_create_int_counter(
|
||||
"gossipsub_attestation_subnet_subscriptions_total",
|
||||
"validator_attestation_subnet_subscriptions_total",
|
||||
"Count of validator attestation subscription requests."
|
||||
);
|
||||
pub static ref SUBNET_SUBSCRIPTION_AGGREGATOR_REQUESTS: Result<IntCounter> = try_create_int_counter(
|
||||
"gossipsub_subnet_subscriptions_aggregator_total",
|
||||
"validator_subnet_subscriptions_aggregator_total",
|
||||
"Count of validator subscription requests where the subscriber is an aggregator."
|
||||
);
|
||||
|
||||
/*
|
||||
* Sync committee subnet subscriptions
|
||||
*/
|
||||
pub static ref SYNC_COMMITTEE_SUBSCRIPTION_REQUESTS: Result<IntCounter> = try_create_int_counter(
|
||||
"gossipsub_sync_committee_subnet_subscriptions_total",
|
||||
"validator_sync_committee_subnet_subscriptions_total",
|
||||
"Count of validator sync committee subscription requests."
|
||||
);
|
||||
|
||||
@ -406,14 +232,13 @@ lazy_static! {
|
||||
"beacon_processor_sync_contribution_verified_total",
|
||||
"Total number of sync committee contributions verified for gossip."
|
||||
);
|
||||
|
||||
pub static ref BEACON_PROCESSOR_SYNC_CONTRIBUTION_IMPORTED_TOTAL: Result<IntCounter> = try_create_int_counter(
|
||||
"beacon_processor_sync_contribution_imported_total",
|
||||
"Total number of sync committee contributions imported to fork choice, etc."
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
/// Errors and Debugging Stats
|
||||
pub static ref GOSSIP_ATTESTATION_ERRORS_PER_TYPE: Result<IntCounterVec> =
|
||||
try_create_int_counter_vec(
|
||||
"gossipsub_attestation_errors_per_type",
|
||||
@ -426,8 +251,16 @@ lazy_static! {
|
||||
"Gossipsub sync_committee errors per error type",
|
||||
&["type"]
|
||||
);
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
|
||||
/*
|
||||
* Bandwidth metrics
|
||||
*/
|
||||
pub static ref INBOUND_LIBP2P_BYTES: Result<IntGauge> =
|
||||
try_create_int_gauge("libp2p_inbound_bytes", "The inbound bandwidth over libp2p");
|
||||
|
||||
pub static ref OUTBOUND_LIBP2P_BYTES: Result<IntGauge> = try_create_int_gauge(
|
||||
"libp2p_outbound_bytes",
|
||||
"The outbound bandwidth over libp2p"
|
||||
@ -436,18 +269,8 @@ lazy_static! {
|
||||
"libp2p_total_bandwidth",
|
||||
"The total inbound/outbound bandwidth over libp2p"
|
||||
);
|
||||
}
|
||||
|
||||
pub fn update_bandwidth_metrics(bandwidth: Arc<BandwidthSinks>) {
|
||||
set_gauge(&INBOUND_LIBP2P_BYTES, bandwidth.total_inbound() as i64);
|
||||
set_gauge(&OUTBOUND_LIBP2P_BYTES, bandwidth.total_outbound() as i64);
|
||||
set_gauge(
|
||||
&TOTAL_LIBP2P_BANDWIDTH,
|
||||
(bandwidth.total_inbound() + bandwidth.total_outbound()) as i64,
|
||||
);
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
/*
|
||||
* Sync related metrics
|
||||
*/
|
||||
@ -489,11 +312,21 @@ lazy_static! {
|
||||
);
|
||||
pub static ref BEACON_PROCESSOR_REPROCESSING_QUEUE_EXPIRED_ATTESTATIONS: Result<IntCounter> = try_create_int_counter(
|
||||
"beacon_processor_reprocessing_queue_expired_attestations",
|
||||
"Number of queued attestations which have expired before a matching block has been found"
|
||||
"Number of queued attestations which have expired before a matching block has been found."
|
||||
);
|
||||
pub static ref BEACON_PROCESSOR_REPROCESSING_QUEUE_MATCHED_ATTESTATIONS: Result<IntCounter> = try_create_int_counter(
|
||||
"beacon_processor_reprocessing_queue_matched_attestations",
|
||||
"Number of queued attestations where as matching block has been imported"
|
||||
"Number of queued attestations where as matching block has been imported."
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
pub fn update_bandwidth_metrics(bandwidth: Arc<BandwidthSinks>) {
|
||||
set_gauge(&INBOUND_LIBP2P_BYTES, bandwidth.total_inbound() as i64);
|
||||
set_gauge(&OUTBOUND_LIBP2P_BYTES, bandwidth.total_outbound() as i64);
|
||||
set_gauge(
|
||||
&TOTAL_LIBP2P_BANDWIDTH,
|
||||
(bandwidth.total_inbound() + bandwidth.total_outbound()) as i64,
|
||||
);
|
||||
}
|
||||
|
||||
@ -505,307 +338,23 @@ pub fn register_sync_committee_error(error: &SyncCommitteeError) {
|
||||
inc_counter_vec(&GOSSIP_SYNC_COMMITTEE_ERRORS_PER_TYPE, &[error.as_ref()]);
|
||||
}
|
||||
|
||||
/// Inspects the `messages` that were being sent to the network and updates Prometheus metrics.
|
||||
pub fn expose_publish_metrics<T: EthSpec>(messages: &[PubsubMessage<T>]) {
|
||||
for message in messages {
|
||||
match message {
|
||||
PubsubMessage::BeaconBlock(_) => inc_counter(&GOSSIP_BLOCKS_TX),
|
||||
PubsubMessage::Attestation(subnet_id) => {
|
||||
inc_counter_vec(
|
||||
&ATTESTATIONS_PUBLISHED_PER_SUBNET_PER_SLOT,
|
||||
&[subnet_id.0.as_ref()],
|
||||
);
|
||||
inc_counter(&GOSSIP_UNAGGREGATED_ATTESTATIONS_TX)
|
||||
}
|
||||
PubsubMessage::AggregateAndProofAttestation(_) => {
|
||||
inc_counter(&GOSSIP_AGGREGATED_ATTESTATIONS_TX)
|
||||
}
|
||||
PubsubMessage::SyncCommitteeMessage(_) => {
|
||||
inc_counter(&GOSSIP_SYNC_COMMITTEE_MESSAGE_TX)
|
||||
}
|
||||
PubsubMessage::SignedContributionAndProof(_) => {
|
||||
inc_counter(&GOSSIP_SYNC_COMMITTEE_CONTRIBUTION_TX)
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Inspects a `message` received from the network and updates Prometheus metrics.
|
||||
pub fn expose_receive_metrics<T: EthSpec>(message: &PubsubMessage<T>) {
|
||||
match message {
|
||||
PubsubMessage::BeaconBlock(_) => inc_counter(&GOSSIP_BLOCKS_RX),
|
||||
PubsubMessage::Attestation(_) => inc_counter(&GOSSIP_UNAGGREGATED_ATTESTATIONS_RX),
|
||||
PubsubMessage::AggregateAndProofAttestation(_) => {
|
||||
inc_counter(&GOSSIP_AGGREGATED_ATTESTATIONS_RX)
|
||||
}
|
||||
PubsubMessage::SyncCommitteeMessage(_) => inc_counter(&GOSSIP_SYNC_COMMITTEE_MESSAGE_RX),
|
||||
PubsubMessage::SignedContributionAndProof(_) => {
|
||||
inc_counter(&GOSSIP_SYNC_COMMITTEE_CONTRIBUTION_RX)
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn update_gossip_metrics<T: EthSpec>(
|
||||
gossipsub: &Gossipsub,
|
||||
network_globals: &Arc<NetworkGlobals<T>>,
|
||||
) {
|
||||
// Clear the metrics
|
||||
let _ = PEERS_PER_PROTOCOL.as_ref().map(|gauge| gauge.reset());
|
||||
let _ = PEERS_PER_PROTOCOL.as_ref().map(|gauge| gauge.reset());
|
||||
let _ = MESH_PEERS_PER_MAIN_TOPIC
|
||||
.as_ref()
|
||||
.map(|gauge| gauge.reset());
|
||||
let _ = AVG_GOSSIPSUB_PEER_SCORE_PER_MAIN_TOPIC
|
||||
.as_ref()
|
||||
.map(|gauge| gauge.reset());
|
||||
let _ = AVG_GOSSIPSUB_PEER_SCORE_PER_ATTESTATION_SUBNET_TOPIC
|
||||
.as_ref()
|
||||
.map(|gauge| gauge.reset());
|
||||
let _ = AVG_GOSSIPSUB_PEER_SCORE_PER_SYNC_SUBNET_TOPIC
|
||||
.as_ref()
|
||||
.map(|gauge| gauge.reset());
|
||||
|
||||
let _ = SCORES_BELOW_ZERO_PER_CLIENT
|
||||
.as_ref()
|
||||
.map(|gauge| gauge.reset());
|
||||
let _ = SCORES_BELOW_GOSSIP_THRESHOLD_PER_CLIENT
|
||||
.as_ref()
|
||||
.map(|gauge| gauge.reset());
|
||||
let _ = SCORES_BELOW_PUBLISH_THRESHOLD_PER_CLIENT
|
||||
.as_ref()
|
||||
.map(|gauge| gauge.reset());
|
||||
let _ = SCORES_BELOW_GREYLIST_THRESHOLD_PER_CLIENT
|
||||
.as_ref()
|
||||
.map(|gauge| gauge.reset());
|
||||
let _ = MIN_SCORES_PER_CLIENT.as_ref().map(|gauge| gauge.reset());
|
||||
let _ = MEDIAN_SCORES_PER_CLIENT.as_ref().map(|gauge| gauge.reset());
|
||||
let _ = MEAN_SCORES_PER_CLIENT.as_ref().map(|gauge| gauge.reset());
|
||||
let _ = MAX_SCORES_PER_CLIENT.as_ref().map(|gauge| gauge.reset());
|
||||
|
||||
let _ = BEACON_BLOCK_MESH_PEERS_PER_CLIENT
|
||||
.as_ref()
|
||||
.map(|gauge| gauge.reset());
|
||||
let _ = BEACON_AGGREGATE_AND_PROOF_MESH_PEERS_PER_CLIENT
|
||||
.as_ref()
|
||||
.map(|gauge| gauge.reset());
|
||||
|
||||
// reset the mesh peers, showing all subnets
|
||||
for subnet_id in 0..T::default_spec().attestation_subnet_count {
|
||||
let _ = get_int_gauge(
|
||||
&MESH_PEERS_PER_ATTESTATION_SUBNET_TOPIC,
|
||||
&[subnet_id_to_string(subnet_id)],
|
||||
)
|
||||
.map(|v| v.set(0));
|
||||
|
||||
let _ = get_int_gauge(
|
||||
&GOSSIPSUB_SUBSCRIBED_ATTESTATION_SUBNET_TOPIC,
|
||||
&[subnet_id_to_string(subnet_id)],
|
||||
)
|
||||
.map(|v| v.set(0));
|
||||
|
||||
let _ = get_int_gauge(
|
||||
&GOSSIPSUB_SUBSCRIBED_PEERS_ATTESTATION_SUBNET_TOPIC,
|
||||
&[subnet_id_to_string(subnet_id)],
|
||||
)
|
||||
.map(|v| v.set(0));
|
||||
}
|
||||
|
||||
for subnet_id in 0..SYNC_COMMITTEE_SUBNET_COUNT {
|
||||
let _ = get_int_gauge(
|
||||
&MESH_PEERS_PER_SYNC_SUBNET_TOPIC,
|
||||
&[sync_subnet_id_to_string(subnet_id)],
|
||||
)
|
||||
.map(|v| v.set(0));
|
||||
|
||||
let _ = get_int_gauge(
|
||||
&GOSSIPSUB_SUBSCRIBED_SYNC_SUBNET_TOPIC,
|
||||
&[sync_subnet_id_to_string(subnet_id)],
|
||||
)
|
||||
.map(|v| v.set(0));
|
||||
|
||||
let _ = get_int_gauge(
|
||||
&GOSSIPSUB_SUBSCRIBED_PEERS_SYNC_SUBNET_TOPIC,
|
||||
&[sync_subnet_id_to_string(subnet_id)],
|
||||
)
|
||||
.map(|v| v.set(0));
|
||||
}
|
||||
|
||||
// Subnet topics subscribed to
|
||||
// Mesh peers per client
|
||||
for topic_hash in gossipsub.topics() {
|
||||
if let Ok(topic) = GossipTopic::decode(topic_hash.as_str()) {
|
||||
if let GossipKind::Attestation(subnet_id) = topic.kind() {
|
||||
let _ = get_int_gauge(
|
||||
&GOSSIPSUB_SUBSCRIBED_ATTESTATION_SUBNET_TOPIC,
|
||||
&[subnet_id_to_string(subnet_id.into())],
|
||||
)
|
||||
.map(|v| v.set(1));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Peers per subscribed subnet
|
||||
let mut peers_per_topic: HashMap<TopicHash, usize> = HashMap::new();
|
||||
for (peer_id, topics) in gossipsub.all_peers() {
|
||||
for topic_hash in topics {
|
||||
*peers_per_topic.entry(topic_hash.clone()).or_default() += 1;
|
||||
|
||||
if let Ok(topic) = GossipTopic::decode(topic_hash.as_str()) {
|
||||
match topic.kind() {
|
||||
GossipKind::Attestation(subnet_id) => {
|
||||
if let Some(v) = get_int_gauge(
|
||||
&GOSSIPSUB_SUBSCRIBED_PEERS_ATTESTATION_SUBNET_TOPIC,
|
||||
&[subnet_id_to_string(subnet_id.into())],
|
||||
) {
|
||||
v.inc()
|
||||
};
|
||||
|
||||
// average peer scores
|
||||
if let Some(score) = gossipsub.peer_score(peer_id) {
|
||||
if let Some(v) = get_gauge(
|
||||
&AVG_GOSSIPSUB_PEER_SCORE_PER_ATTESTATION_SUBNET_TOPIC,
|
||||
&[subnet_id_to_string(subnet_id.into())],
|
||||
) {
|
||||
v.add(score)
|
||||
};
|
||||
}
|
||||
}
|
||||
GossipKind::SyncCommitteeMessage(subnet_id) => {
|
||||
if let Some(v) = get_int_gauge(
|
||||
&GOSSIPSUB_SUBSCRIBED_PEERS_SYNC_SUBNET_TOPIC,
|
||||
&[sync_subnet_id_to_string(subnet_id.into())],
|
||||
) {
|
||||
v.inc()
|
||||
};
|
||||
|
||||
// average peer scores
|
||||
if let Some(score) = gossipsub.peer_score(peer_id) {
|
||||
if let Some(v) = get_gauge(
|
||||
&AVG_GOSSIPSUB_PEER_SCORE_PER_SYNC_SUBNET_TOPIC,
|
||||
&[sync_subnet_id_to_string(subnet_id.into())],
|
||||
) {
|
||||
v.add(score)
|
||||
};
|
||||
}
|
||||
}
|
||||
kind => {
|
||||
// main topics
|
||||
if let Some(score) = gossipsub.peer_score(peer_id) {
|
||||
if let Some(v) = get_gauge(
|
||||
&AVG_GOSSIPSUB_PEER_SCORE_PER_MAIN_TOPIC,
|
||||
&[kind.as_ref()],
|
||||
) {
|
||||
v.add(score)
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// adjust to average scores by dividing by number of peers
|
||||
for (topic_hash, peers) in peers_per_topic.iter() {
|
||||
if let Ok(topic) = GossipTopic::decode(topic_hash.as_str()) {
|
||||
match topic.kind() {
|
||||
GossipKind::Attestation(subnet_id) => {
|
||||
// average peer scores
|
||||
if let Some(v) = get_gauge(
|
||||
&AVG_GOSSIPSUB_PEER_SCORE_PER_ATTESTATION_SUBNET_TOPIC,
|
||||
&[subnet_id_to_string(subnet_id.into())],
|
||||
) {
|
||||
v.set(v.get() / (*peers as f64))
|
||||
};
|
||||
}
|
||||
GossipKind::SyncCommitteeMessage(subnet_id) => {
|
||||
// average peer scores
|
||||
if let Some(v) = get_gauge(
|
||||
&AVG_GOSSIPSUB_PEER_SCORE_PER_SYNC_SUBNET_TOPIC,
|
||||
&[sync_subnet_id_to_string(subnet_id.into())],
|
||||
) {
|
||||
v.set(v.get() / (*peers as f64))
|
||||
};
|
||||
}
|
||||
kind => {
|
||||
// main topics
|
||||
if let Some(v) =
|
||||
get_gauge(&AVG_GOSSIPSUB_PEER_SCORE_PER_MAIN_TOPIC, &[kind.as_ref()])
|
||||
{
|
||||
v.set(v.get() / (*peers as f64))
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// mesh peers
|
||||
for topic_hash in gossipsub.topics() {
|
||||
let peers = gossipsub.mesh_peers(topic_hash).count();
|
||||
if let Ok(topic) = GossipTopic::decode(topic_hash.as_str()) {
|
||||
match topic.kind() {
|
||||
GossipKind::Attestation(subnet_id) => {
|
||||
if let Some(v) = get_int_gauge(
|
||||
&MESH_PEERS_PER_ATTESTATION_SUBNET_TOPIC,
|
||||
&[subnet_id_to_string(subnet_id.into())],
|
||||
) {
|
||||
v.set(peers as i64)
|
||||
};
|
||||
}
|
||||
GossipKind::SyncCommitteeMessage(subnet_id) => {
|
||||
if let Some(v) = get_int_gauge(
|
||||
&MESH_PEERS_PER_SYNC_SUBNET_TOPIC,
|
||||
&[sync_subnet_id_to_string(subnet_id.into())],
|
||||
) {
|
||||
v.set(peers as i64)
|
||||
};
|
||||
}
|
||||
kind => {
|
||||
// main topics
|
||||
if let Some(v) = get_int_gauge(&MESH_PEERS_PER_MAIN_TOPIC, &[kind.as_ref()]) {
|
||||
v.set(peers as i64)
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// protocol peers
|
||||
let mut peers_per_protocol: HashMap<&'static str, i64> = HashMap::new();
|
||||
for (_peer, protocol) in gossipsub.peer_protocol() {
|
||||
*peers_per_protocol
|
||||
.entry(protocol.as_static_ref())
|
||||
.or_default() += 1;
|
||||
}
|
||||
|
||||
for (protocol, peers) in peers_per_protocol.iter() {
|
||||
if let Some(v) = get_int_gauge(&PEERS_PER_PROTOCOL, &[protocol]) {
|
||||
v.set(*peers)
|
||||
};
|
||||
}
|
||||
|
||||
let mut peer_to_client = HashMap::new();
|
||||
let mut scores_per_client: HashMap<&'static str, Vec<f64>> = HashMap::new();
|
||||
{
|
||||
let peers = network_globals.peers.read();
|
||||
for (peer_id, _) in gossipsub.all_peers() {
|
||||
let client = peers
|
||||
GossipKind::Attestation(_subnet_id) => {}
|
||||
GossipKind::BeaconBlock => {
|
||||
for peer_id in gossipsub.mesh_peers(topic_hash) {
|
||||
let client = network_globals
|
||||
.peers
|
||||
.read()
|
||||
.peer_info(peer_id)
|
||||
.map(|peer_info| peer_info.client().kind.as_static())
|
||||
.unwrap_or_else(|| "Unknown");
|
||||
|
||||
peer_to_client.insert(peer_id, client);
|
||||
let score = gossipsub.peer_score(peer_id).unwrap_or(0.0);
|
||||
scores_per_client.entry(client).or_default().push(score);
|
||||
}
|
||||
}
|
||||
|
||||
// mesh peers per client
|
||||
for topic_hash in gossipsub.topics() {
|
||||
if let Ok(topic) = GossipTopic::decode(topic_hash.as_str()) {
|
||||
match topic.kind() {
|
||||
GossipKind::BeaconBlock => {
|
||||
for peer in gossipsub.mesh_peers(topic_hash) {
|
||||
if let Some(client) = peer_to_client.get(peer) {
|
||||
if let Some(v) =
|
||||
get_int_gauge(&BEACON_BLOCK_MESH_PEERS_PER_CLIENT, &[client])
|
||||
{
|
||||
@ -813,10 +362,14 @@ pub fn update_gossip_metrics<T: EthSpec>(
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
GossipKind::BeaconAggregateAndProof => {
|
||||
for peer in gossipsub.mesh_peers(topic_hash) {
|
||||
if let Some(client) = peer_to_client.get(peer) {
|
||||
for peer_id in gossipsub.mesh_peers(topic_hash) {
|
||||
let client = network_globals
|
||||
.peers
|
||||
.read()
|
||||
.peer_info(peer_id)
|
||||
.map(|peer_info| peer_info.client().kind.as_static())
|
||||
.unwrap_or_else(|| "Unknown");
|
||||
if let Some(v) = get_int_gauge(
|
||||
&BEACON_AGGREGATE_AND_PROOF_MESH_PEERS_PER_CLIENT,
|
||||
&[client],
|
||||
@ -825,80 +378,9 @@ pub fn update_gossip_metrics<T: EthSpec>(
|
||||
};
|
||||
}
|
||||
}
|
||||
GossipKind::SyncCommitteeMessage(_subnet_id) => {}
|
||||
_kind => {}
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (client, scores) in scores_per_client.into_iter() {
|
||||
let c = &[client];
|
||||
let len = scores.len();
|
||||
if len > 0 {
|
||||
let mut below0 = 0;
|
||||
let mut below_gossip_threshold = 0;
|
||||
let mut below_publish_threshold = 0;
|
||||
let mut below_greylist_threshold = 0;
|
||||
let mut min = f64::INFINITY;
|
||||
let mut sum = 0.0;
|
||||
let mut max = f64::NEG_INFINITY;
|
||||
|
||||
let count = scores.len() as f64;
|
||||
|
||||
for &score in &scores {
|
||||
if score < 0.0 {
|
||||
below0 += 1;
|
||||
}
|
||||
if score < -4000.0 {
|
||||
//TODO not hardcode
|
||||
below_gossip_threshold += 1;
|
||||
}
|
||||
if score < -8000.0 {
|
||||
//TODO not hardcode
|
||||
below_publish_threshold += 1;
|
||||
}
|
||||
if score < -16000.0 {
|
||||
//TODO not hardcode
|
||||
below_greylist_threshold += 1;
|
||||
}
|
||||
if score < min {
|
||||
min = score;
|
||||
}
|
||||
if score > max {
|
||||
max = score;
|
||||
}
|
||||
sum += score;
|
||||
}
|
||||
|
||||
let median = if len == 0 {
|
||||
0.0
|
||||
} else if len % 2 == 0 {
|
||||
(scores[len / 2 - 1] + scores[len / 2]) / 2.0
|
||||
} else {
|
||||
scores[len / 2]
|
||||
};
|
||||
|
||||
set_gauge_entry(&SCORES_BELOW_ZERO_PER_CLIENT, c, below0 as f64 / count);
|
||||
set_gauge_entry(
|
||||
&SCORES_BELOW_GOSSIP_THRESHOLD_PER_CLIENT,
|
||||
c,
|
||||
below_gossip_threshold as f64 / count,
|
||||
);
|
||||
set_gauge_entry(
|
||||
&SCORES_BELOW_PUBLISH_THRESHOLD_PER_CLIENT,
|
||||
c,
|
||||
below_publish_threshold as f64 / count,
|
||||
);
|
||||
set_gauge_entry(
|
||||
&SCORES_BELOW_GREYLIST_THRESHOLD_PER_CLIENT,
|
||||
c,
|
||||
below_greylist_threshold as f64 / count,
|
||||
);
|
||||
|
||||
set_gauge_entry(&MIN_SCORES_PER_CLIENT, c, min);
|
||||
set_gauge_entry(&MEDIAN_SCORES_PER_CLIENT, c, median);
|
||||
set_gauge_entry(&MEAN_SCORES_PER_CLIENT, c, sum / count);
|
||||
set_gauge_entry(&MAX_SCORES_PER_CLIENT, c, max);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -9,15 +9,18 @@ use crate::{
|
||||
use beacon_chain::{BeaconChain, BeaconChainError, BeaconChainTypes};
|
||||
use futures::future::OptionFuture;
|
||||
use futures::prelude::*;
|
||||
use lighthouse_network::{
|
||||
open_metrics_client::registry::Registry, MessageAcceptance, Service as LibP2PService,
|
||||
};
|
||||
use lighthouse_network::{
|
||||
rpc::{GoodbyeReason, RPCResponseErrorCode, RequestId},
|
||||
Libp2pEvent, PeerAction, PeerRequestId, PubsubMessage, ReportSource, Request, Response, Subnet,
|
||||
Context, Libp2pEvent, PeerAction, PeerRequestId, PubsubMessage, ReportSource, Request,
|
||||
Response, Subnet,
|
||||
};
|
||||
use lighthouse_network::{
|
||||
types::{GossipEncoding, GossipTopic},
|
||||
BehaviourEvent, MessageId, NetworkGlobals, PeerId,
|
||||
};
|
||||
use lighthouse_network::{MessageAcceptance, Service as LibP2PService};
|
||||
use slog::{crit, debug, error, info, o, trace, warn};
|
||||
use std::{net::SocketAddr, pin::Pin, sync::Arc, time::Duration};
|
||||
use store::HotColdDB;
|
||||
@ -32,7 +35,7 @@ use types::{
|
||||
mod tests;
|
||||
|
||||
/// The interval (in seconds) that various network metrics will update.
|
||||
const METRIC_UPDATE_INTERVAL: u64 = 1;
|
||||
const METRIC_UPDATE_INTERVAL: u64 = 5;
|
||||
/// Number of slots before the fork when we should subscribe to the new fork topics.
|
||||
const SUBSCRIBE_DELAY_SLOTS: u64 = 2;
|
||||
/// Delay after a fork where we unsubscribe from pre-fork topics.
|
||||
@ -154,6 +157,7 @@ impl<T: BeaconChainTypes> NetworkService<T> {
|
||||
beacon_chain: Arc<BeaconChain<T>>,
|
||||
config: &NetworkConfig,
|
||||
executor: task_executor::TaskExecutor,
|
||||
gossipsub_registry: Option<&'_ mut Registry>,
|
||||
) -> error::Result<(
|
||||
Arc<NetworkGlobals<T::EthSpec>>,
|
||||
mpsc::UnboundedSender<NetworkMessage<T::EthSpec>>,
|
||||
@ -199,16 +203,18 @@ impl<T: BeaconChainTypes> NetworkService<T> {
|
||||
|
||||
debug!(network_log, "Current fork"; "fork_name" => ?fork_context.current_fork());
|
||||
|
||||
// launch libp2p service
|
||||
let (network_globals, mut libp2p) = LibP2PService::new(
|
||||
executor.clone(),
|
||||
// construct the libp2p service context
|
||||
let service_context = Context {
|
||||
config,
|
||||
enr_fork_id,
|
||||
&network_log,
|
||||
fork_context.clone(),
|
||||
&beacon_chain.spec,
|
||||
)
|
||||
.await?;
|
||||
fork_context: fork_context.clone(),
|
||||
chain_spec: &beacon_chain.spec,
|
||||
gossipsub_registry,
|
||||
};
|
||||
|
||||
// launch libp2p service
|
||||
let (network_globals, mut libp2p) =
|
||||
LibP2PService::new(executor.clone(), service_context, &network_log).await?;
|
||||
|
||||
// Repopulate the DHT with stored ENR's if discovery is not disabled.
|
||||
if !config.disable_discovery {
|
||||
@ -324,21 +330,13 @@ fn spawn_service<T: BeaconChainTypes>(
|
||||
// spawn on the current executor
|
||||
executor.spawn(async move {
|
||||
|
||||
let mut metric_update_counter = 0;
|
||||
loop {
|
||||
// build the futures to check simultaneously
|
||||
tokio::select! {
|
||||
_ = service.metrics_update.tick(), if service.metrics_enabled => {
|
||||
// update various network metrics
|
||||
metric_update_counter +=1;
|
||||
if metric_update_counter % T::EthSpec::default_spec().seconds_per_slot == 0 {
|
||||
// if a slot has occurred, reset the metrics
|
||||
let _ = metrics::ATTESTATIONS_PUBLISHED_PER_SUBNET_PER_SLOT
|
||||
.as_ref()
|
||||
.map(|gauge| gauge.reset());
|
||||
}
|
||||
metrics::update_gossip_metrics::<T::EthSpec>(
|
||||
service.libp2p.swarm.behaviour_mut().gs(),
|
||||
service.libp2p.swarm.behaviour().gs(),
|
||||
&service.network_globals,
|
||||
);
|
||||
// update sync metrics
|
||||
@ -445,7 +443,6 @@ fn spawn_service<T: BeaconChainTypes>(
|
||||
"count" => messages.len(),
|
||||
"topics" => ?topic_kinds
|
||||
);
|
||||
metrics::expose_publish_metrics(&messages);
|
||||
service.libp2p.swarm.behaviour_mut().publish(messages);
|
||||
}
|
||||
NetworkMessage::ReportPeer { peer_id, action, source } => service.libp2p.report_peer(&peer_id, action, source),
|
||||
@ -643,9 +640,6 @@ fn spawn_service<T: BeaconChainTypes>(
|
||||
message,
|
||||
..
|
||||
} => {
|
||||
// Update prometheus metrics.
|
||||
metrics::expose_receive_metrics(&message);
|
||||
|
||||
match message {
|
||||
// attestation information gets processed in the attestation service
|
||||
PubsubMessage::Attestation(ref subnet_and_attestation) => {
|
||||
|
@ -67,7 +67,8 @@ mod tests {
|
||||
// Create a new network service which implicitly gets dropped at the
|
||||
// end of the block.
|
||||
|
||||
let _network_service = NetworkService::start(beacon_chain.clone(), &config, executor)
|
||||
let _network_service =
|
||||
NetworkService::start(beacon_chain.clone(), &config, executor, None)
|
||||
.await
|
||||
.unwrap();
|
||||
drop(signal);
|
||||
|
@ -22,6 +22,6 @@ serde = "1.0.116"
|
||||
serde_derive = "1.0.116"
|
||||
lazy_static = "1.4.0"
|
||||
lighthouse_metrics = { path = "../../common/lighthouse_metrics" }
|
||||
lru = "0.6.0"
|
||||
lru = "0.7.1"
|
||||
sloggers = { version = "2.1.1", features = ["json"] }
|
||||
directory = { path = "../../common/directory" }
|
||||
|
@ -307,6 +307,12 @@ pub fn set_float_gauge(gauge: &Result<Gauge>, value: f64) {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn set_float_gauge_vec(gauge_vec: &Result<GaugeVec>, name: &[&str], value: f64) {
|
||||
if let Some(gauge) = get_gauge(gauge_vec, name) {
|
||||
gauge.set(value);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn inc_gauge(gauge: &Result<IntGauge>) {
|
||||
if let Ok(gauge) = gauge {
|
||||
gauge.inc();
|
||||
|
@ -39,7 +39,7 @@ pub fn run<T: EthSpec>(matches: &ArgMatches) -> Result<(), String> {
|
||||
next_fork_version: genesis_fork_version,
|
||||
next_fork_epoch: Epoch::max_value(), // FAR_FUTURE_EPOCH
|
||||
};
|
||||
let enr = build_enr::<T>(&enr_key, &config, enr_fork_id)
|
||||
let enr = build_enr::<T>(&enr_key, &config, &enr_fork_id)
|
||||
.map_err(|e| format!("Unable to create ENR: {:?}", e))?;
|
||||
|
||||
fs::create_dir_all(&output_dir).map_err(|e| format!("Unable to create output-dir: {:?}", e))?;
|
||||
|
@ -14,7 +14,7 @@ lazy_static = "1.4.0"
|
||||
lighthouse_metrics = { path = "../common/lighthouse_metrics" }
|
||||
filesystem = { path = "../common/filesystem" }
|
||||
mdbx = { package = "libmdbx", version = "0.1.0" }
|
||||
lru = "0.6.6"
|
||||
lru = "0.7.1"
|
||||
parking_lot = "0.11.0"
|
||||
rand = "0.7.3"
|
||||
safe_arith = { path = "../consensus/safe_arith" }
|
||||
|
Loading…
Reference in New Issue
Block a user