From f3380c00b8fed78ead51fa1c8302bb68942fd806 Mon Sep 17 00:00:00 2001 From: Age Manning Date: Fri, 19 Jun 2020 15:36:03 +1000 Subject: [PATCH] Discovery metrics (#1276) * Silky smooth squash * Add discovery metrics * Fix gauge metric, increase discv5 sessions * Formatting --- beacon_node/eth2_libp2p/src/config.rs | 2 +- beacon_node/eth2_libp2p/src/lib.rs | 1 + beacon_node/eth2_libp2p/src/metrics.rs | 33 ++++++++++++++++++- .../src/peer_manager/discovery/mod.rs | 13 +++++--- beacon_node/rest_api/src/metrics.rs | 1 + common/lighthouse_metrics/src/lib.rs | 28 ++++++++++++---- 6 files changed, 65 insertions(+), 13 deletions(-) diff --git a/beacon_node/eth2_libp2p/src/config.rs b/beacon_node/eth2_libp2p/src/config.rs index 08c451cbe..8586d8caa 100644 --- a/beacon_node/eth2_libp2p/src/config.rs +++ b/beacon_node/eth2_libp2p/src/config.rs @@ -110,7 +110,7 @@ impl Default for Config { // discv5 configuration let discv5_config = Discv5ConfigBuilder::new() .enable_packet_filter() - .session_cache_capacity(100) + .session_cache_capacity(1000) .request_timeout(Duration::from_secs(4)) .request_retries(2) .enr_peer_update_min(2) // prevents NAT's should be raised for mainnet diff --git a/beacon_node/eth2_libp2p/src/lib.rs b/beacon_node/eth2_libp2p/src/lib.rs index 24a66e0a6..a35396c6f 100644 --- a/beacon_node/eth2_libp2p/src/lib.rs +++ b/beacon_node/eth2_libp2p/src/lib.rs @@ -20,6 +20,7 @@ pub use discv5; pub use libp2p::gossipsub::{MessageId, Topic, TopicHash}; pub use libp2p::{core::ConnectedPoint, PeerId, Swarm}; pub use libp2p::{multiaddr, Multiaddr}; +pub use metrics::scrape_discovery_metrics; pub use peer_manager::discovery; pub use peer_manager::{ client::Client, diff --git a/beacon_node/eth2_libp2p/src/metrics.rs b/beacon_node/eth2_libp2p/src/metrics.rs index 52caf9e92..d1e19b653 100644 --- a/beacon_node/eth2_libp2p/src/metrics.rs +++ b/beacon_node/eth2_libp2p/src/metrics.rs @@ -21,8 +21,39 @@ lazy_static! { "discovery_queue_size", "The number of discovery queries awaiting execution" ); - pub static ref DISCOVERY_REQS: Result = try_create_int_gauge( + pub static ref DISCOVERY_REQS: Result = try_create_float_gauge( "discovery_requests", "The number of unsolicited discovery requests per second" ); + pub static ref DISCOVERY_SESSIONS: Result = try_create_int_gauge( + "discovery_sessions", + "The number of active discovery sessions with peers" + ); + pub static ref DISCOVERY_REQS_IP: Result = try_create_float_gauge_vec( + "discovery_reqs_per_ip", + "Unsolicited discovery requests per ip per second", + &["Addresses"] + ); +} + +pub fn scrape_discovery_metrics() { + let metrics = discv5::metrics::Metrics::from(discv5::Discv5::raw_metrics()); + + set_float_gauge(&DISCOVERY_REQS, metrics.unsolicited_requests_per_second); + + set_gauge(&DISCOVERY_SESSIONS, metrics.active_sessions as i64); + + let process_gauge_vec = |gauge: &Result, metrics: discv5::metrics::Metrics| { + if let Ok(gauge_vec) = gauge { + gauge_vec.reset(); + for (ip, value) in metrics.requests_per_ip_per_second.iter() { + if let Ok(metric) = gauge_vec.get_metric_with_label_values(&[&format!("{:?}", ip)]) + { + metric.set(*value); + } + } + } + }; + + process_gauge_vec(&DISCOVERY_REQS_IP, metrics); } diff --git a/beacon_node/eth2_libp2p/src/peer_manager/discovery/mod.rs b/beacon_node/eth2_libp2p/src/peer_manager/discovery/mod.rs index 2e813b536..9a0607b81 100644 --- a/beacon_node/eth2_libp2p/src/peer_manager/discovery/mod.rs +++ b/beacon_node/eth2_libp2p/src/peer_manager/discovery/mod.rs @@ -12,9 +12,8 @@ use crate::{error, Enr, NetworkConfig, NetworkGlobals}; use discv5::{enr::NodeId, Discv5, Discv5Event}; use enr::{BITFIELD_ENR_KEY, ETH2_ENR_KEY}; use futures::prelude::*; -use libp2p::core::PeerId; -// use libp2p::multiaddr::Protocol; use futures::stream::FuturesUnordered; +use libp2p::core::PeerId; use lru::LruCache; use slog::{crit, debug, info, trace, warn}; use ssz::{Decode, Encode}; @@ -197,11 +196,11 @@ impl Discovery { }); } - // start the discv5 service. + // Start the discv5 service. discv5.start(listen_socket); debug!(log, "Discovery service started"); - // obtain the event stream + // Obtain the event stream let event_stream = EventStream::Awaiting(Box::pin(discv5.event_stream())); Ok(Self { @@ -224,6 +223,10 @@ impl Discovery { /// This adds a new `FindPeers` query to the queue if one doesn't already exist. pub fn discover_peers(&mut self) { + // If we are in the process of a query, don't bother queuing a new one. + if self.find_peer_active { + return; + } // If there is not already a find peer's query queued, add one let query = QueryType::FindPeers; if !self.queued_queries.contains(&query) { @@ -426,6 +429,8 @@ impl Discovery { None => {} // Queue is empty } } + // Update the queue metric + metrics::set_gauge(&metrics::DISCOVERY_QUEUE, self.queued_queries.len() as i64); } // Returns a boolean indicating if we are currently processing the maximum number of diff --git a/beacon_node/rest_api/src/metrics.rs b/beacon_node/rest_api/src/metrics.rs index ac6775623..87b4f6285 100644 --- a/beacon_node/rest_api/src/metrics.rs +++ b/beacon_node/rest_api/src/metrics.rs @@ -104,6 +104,7 @@ pub fn get_prometheus( slot_clock::scrape_for_metrics::(&beacon_chain.slot_clock); store::scrape_for_metrics(&db_path, &freezer_db_path); beacon_chain::scrape_for_metrics(&beacon_chain); + eth2_libp2p::scrape_discovery_metrics(); // This will silently fail if we are unable to observe the health. This is desired behaviour // since we don't support `Health` for all platforms. diff --git a/common/lighthouse_metrics/src/lib.rs b/common/lighthouse_metrics/src/lib.rs index c6314bdb4..a0f59c54b 100644 --- a/common/lighthouse_metrics/src/lib.rs +++ b/common/lighthouse_metrics/src/lib.rs @@ -57,7 +57,8 @@ use prometheus::{HistogramOpts, HistogramTimer, Opts}; pub use prometheus::{ - Encoder, Gauge, Histogram, HistogramVec, IntCounter, IntGauge, IntGaugeVec, Result, TextEncoder, + Encoder, Gauge, GaugeVec, Histogram, HistogramVec, IntCounter, IntGauge, IntGaugeVec, Result, + TextEncoder, }; /// Collect all the metrics for reporting. @@ -127,6 +128,19 @@ pub fn try_create_int_gauge_vec( Ok(counter_vec) } +/// Attempts to crate a `GaugeVec`, returning `Err` if the registry does not accept the gauge +/// (potentially due to naming conflict). +pub fn try_create_float_gauge_vec( + name: &str, + help: &str, + label_names: &[&str], +) -> Result { + let opts = Opts::new(name, help); + let counter_vec = GaugeVec::new(opts, label_names)?; + prometheus::register(Box::new(counter_vec.clone()))?; + Ok(counter_vec) +} + pub fn get_int_gauge(int_gauge_vec: &Result, name: &[&str]) -> Option { if let Ok(int_gauge_vec) = int_gauge_vec { Some(int_gauge_vec.get_metric_with_label_values(name).ok()?) @@ -177,6 +191,12 @@ pub fn set_gauge(gauge: &Result, value: i64) { } } +pub fn set_float_gauge(gauge: &Result, value: f64) { + if let Ok(gauge) = gauge { + gauge.set(value); + } +} + pub fn inc_gauge(gauge: &Result) { if let Ok(gauge) = gauge { gauge.inc(); @@ -195,12 +215,6 @@ pub fn maybe_set_gauge(gauge: &Result, value_opt: Option) { } } -pub fn set_float_gauge(gauge: &Result, value: f64) { - if let Ok(gauge) = gauge { - gauge.set(value); - } -} - pub fn maybe_set_float_gauge(gauge: &Result, value_opt: Option) { if let Some(value) = value_opt { set_float_gauge(gauge, value)