Configurable monitoring endpoint frequency (#3530)

## Issue Addressed

Closes #3514

## Proposed Changes

- Change default monitoring endpoint frequency to 120 seconds to fit with 30k requests/month limit.
- Allow configuration of the monitoring endpoint frequency using `--monitoring-endpoint-frequency N` where `N` is a value in seconds.
This commit is contained in:
Michael Sproul 2022-09-05 08:29:00 +00:00
parent 177aef8f1e
commit 9a7f7f1c1e
9 changed files with 105 additions and 4 deletions

View File

@ -320,6 +320,15 @@ pub fn cli_app<'a, 'b>() -> App<'a, 'b> {
and never provide an untrusted URL.") and never provide an untrusted URL.")
.takes_value(true), .takes_value(true),
) )
.arg(
Arg::with_name("monitoring-endpoint-period")
.long("monitoring-endpoint-period")
.value_name("SECONDS")
.help("Defines how many seconds to wait between each message sent to \
the monitoring-endpoint. Default: 60s")
.requires("monitoring-endpoint")
.takes_value(true),
)
/* /*
* Standard staking flags * Standard staking flags

View File

@ -178,9 +178,13 @@ pub fn get_config<E: EthSpec>(
* Explorer metrics * Explorer metrics
*/ */
if let Some(monitoring_endpoint) = cli_args.value_of("monitoring-endpoint") { if let Some(monitoring_endpoint) = cli_args.value_of("monitoring-endpoint") {
let update_period_secs =
clap_utils::parse_optional(cli_args, "monitoring-endpoint-period")?;
client_config.monitoring_api = Some(monitoring_api::Config { client_config.monitoring_api = Some(monitoring_api::Config {
db_path: None, db_path: None,
freezer_db_path: None, freezer_db_path: None,
update_period_secs,
monitoring_endpoint: monitoring_endpoint.to_string(), monitoring_endpoint: monitoring_endpoint.to_string(),
}); });
} }

View File

@ -48,3 +48,39 @@ Check to ensure that the metrics are available on the default port:
```bash ```bash
curl localhost:5064/metrics curl localhost:5064/metrics
``` ```
## Remote Monitoring
Lighthouse has the ability to send a subset of metrics to a remote server for collection. Presently
the main server offering remote monitoring is beaconcha.in. Instructions for setting this up
can be found in beaconcha.in's docs:
- <https://kb.beaconcha.in/beaconcha.in-explorer/mobile-app-less-than-greater-than-beacon-node>
The Lighthouse flag for setting the monitoring URL is `--monitoring-endpoint`.
When sending metrics to a remote server you should be conscious of security:
- Only use a monitoring service that you trust: you are sending detailed information about
your validators and beacon node to this service which could be used to track you.
- Always use an HTTPS URL to prevent the traffic being intercepted in transit.
The specification for the monitoring endpoint can be found here:
- <https://github.com/gobitfly/eth2-client-metrics>
_Note: the similarly named [Validator Monitor](./validator-monitoring.md) feature is entirely
independent of remote metric monitoring_.
### Update Period
You can adjust the frequency at which Lighthouse sends metrics to the remote server using the
`--monitoring-endpoint-period` flag. It takes an integer value in seconds, defaulting to 60
seconds.
```
lighthouse bn --monitoring-endpoint-period 60 --monitoring-endpoint "https://url"
```
Increasing the monitoring period between can be useful if you are running into rate limits when
posting large amounts of data for multiple nodes.

View File

@ -4,6 +4,9 @@ Lighthouse allows for fine-grained monitoring of specific validators using the "
Generally users will want to use this function to track their own validators, however, it can be Generally users will want to use this function to track their own validators, however, it can be
used for any validator, regardless of who controls it. used for any validator, regardless of who controls it.
_Note: If you are looking for remote metric monitoring, please see the docs on
[Prometheus Metrics](./advanced_metrics.md)_.
## Monitoring is in the Beacon Node ## Monitoring is in the Beacon Node
Lighthouse performs validator monitoring in the Beacon Node (BN) instead of the Validator Client Lighthouse performs validator monitoring in the Beacon Node (BN) instead of the Validator Client

View File

@ -16,7 +16,7 @@ use types::*;
pub use types::ProcessType; pub use types::ProcessType;
/// Duration after which we collect and send metrics to remote endpoint. /// Duration after which we collect and send metrics to remote endpoint.
pub const UPDATE_DURATION: u64 = 60; pub const DEFAULT_UPDATE_DURATION: u64 = 60;
/// Timeout for HTTP requests. /// Timeout for HTTP requests.
pub const TIMEOUT_DURATION: u64 = 5; pub const TIMEOUT_DURATION: u64 = 5;
@ -55,6 +55,8 @@ pub struct Config {
/// Path for the cold database required for fetching beacon db size metrics. /// Path for the cold database required for fetching beacon db size metrics.
/// Note: not relevant for validator and system metrics. /// Note: not relevant for validator and system metrics.
pub freezer_db_path: Option<PathBuf>, pub freezer_db_path: Option<PathBuf>,
/// User-defined update period in seconds.
pub update_period_secs: Option<u64>,
} }
#[derive(Clone)] #[derive(Clone)]
@ -64,6 +66,7 @@ pub struct MonitoringHttpClient {
db_path: Option<PathBuf>, db_path: Option<PathBuf>,
/// Path to the freezer database. /// Path to the freezer database.
freezer_db_path: Option<PathBuf>, freezer_db_path: Option<PathBuf>,
update_period: Duration,
monitoring_endpoint: SensitiveUrl, monitoring_endpoint: SensitiveUrl,
log: slog::Logger, log: slog::Logger,
} }
@ -74,6 +77,9 @@ impl MonitoringHttpClient {
client: reqwest::Client::new(), client: reqwest::Client::new(),
db_path: config.db_path.clone(), db_path: config.db_path.clone(),
freezer_db_path: config.freezer_db_path.clone(), freezer_db_path: config.freezer_db_path.clone(),
update_period: Duration::from_secs(
config.update_period_secs.unwrap_or(DEFAULT_UPDATE_DURATION),
),
monitoring_endpoint: SensitiveUrl::parse(&config.monitoring_endpoint) monitoring_endpoint: SensitiveUrl::parse(&config.monitoring_endpoint)
.map_err(|e| format!("Invalid monitoring endpoint: {:?}", e))?, .map_err(|e| format!("Invalid monitoring endpoint: {:?}", e))?,
log, log,
@ -100,10 +106,15 @@ impl MonitoringHttpClient {
let mut interval = interval_at( let mut interval = interval_at(
// Have some initial delay for the metrics to get initialized // Have some initial delay for the metrics to get initialized
Instant::now() + Duration::from_secs(25), Instant::now() + Duration::from_secs(25),
Duration::from_secs(UPDATE_DURATION), self.update_period,
); );
info!(self.log, "Starting monitoring api"; "endpoint" => %self.monitoring_endpoint); info!(
self.log,
"Starting monitoring API";
"endpoint" => %self.monitoring_endpoint,
"update_period" => format!("{}s", self.update_period.as_secs()),
);
let update_future = async move { let update_future = async move {
loop { loop {

View File

@ -1416,7 +1416,7 @@ fn slasher_backend_override_to_default() {
} }
#[test] #[test]
pub fn malloc_tuning_flag() { fn malloc_tuning_flag() {
CommandLineTest::new() CommandLineTest::new()
.flag("disable-malloc-tuning", None) .flag("disable-malloc-tuning", None)
.run_with_zero_port() .run_with_zero_port()
@ -1439,3 +1439,16 @@ fn ensure_panic_on_failed_launch() {
assert_eq!(slasher_config.chunk_size, 10); assert_eq!(slasher_config.chunk_size, 10);
}); });
} }
#[test]
fn monitoring_endpoint() {
CommandLineTest::new()
.flag("monitoring-endpoint", Some("http://example:8000"))
.flag("monitoring-endpoint-period", Some("30"))
.run_with_zero_port()
.with_config(|config| {
let api_conf = config.monitoring_api.as_ref().unwrap();
assert_eq!(api_conf.monitoring_endpoint.as_str(), "http://example:8000");
assert_eq!(api_conf.update_period_secs, Some(30));
});
}

View File

@ -443,3 +443,16 @@ fn no_strict_fee_recipient_flag() {
.run() .run()
.with_config(|config| assert!(!config.strict_fee_recipient)); .with_config(|config| assert!(!config.strict_fee_recipient));
} }
#[test]
fn monitoring_endpoint() {
CommandLineTest::new()
.flag("monitoring-endpoint", Some("http://example:8000"))
.flag("monitoring-endpoint-period", Some("30"))
.run()
.with_config(|config| {
let api_conf = config.monitoring_api.as_ref().unwrap();
assert_eq!(api_conf.monitoring_endpoint.as_str(), "http://example:8000");
assert_eq!(api_conf.update_period_secs, Some(30));
});
}

View File

@ -236,6 +236,15 @@ pub fn cli_app<'a, 'b>() -> App<'a, 'b> {
and never provide an untrusted URL.") and never provide an untrusted URL.")
.takes_value(true), .takes_value(true),
) )
.arg(
Arg::with_name("monitoring-endpoint-period")
.long("monitoring-endpoint-period")
.value_name("SECONDS")
.help("Defines how many seconds to wait between each message sent to \
the monitoring-endpoint. Default: 60s")
.requires("monitoring-endpoint")
.takes_value(true),
)
.arg( .arg(
Arg::with_name("enable-doppelganger-protection") Arg::with_name("enable-doppelganger-protection")
.long("enable-doppelganger-protection") .long("enable-doppelganger-protection")

View File

@ -296,9 +296,12 @@ impl Config {
* Explorer metrics * Explorer metrics
*/ */
if let Some(monitoring_endpoint) = cli_args.value_of("monitoring-endpoint") { if let Some(monitoring_endpoint) = cli_args.value_of("monitoring-endpoint") {
let update_period_secs =
clap_utils::parse_optional(cli_args, "monitoring-endpoint-period")?;
config.monitoring_api = Some(monitoring_api::Config { config.monitoring_api = Some(monitoring_api::Config {
db_path: None, db_path: None,
freezer_db_path: None, freezer_db_path: None,
update_period_secs,
monitoring_endpoint: monitoring_endpoint.to_string(), monitoring_endpoint: monitoring_endpoint.to_string(),
}); });
} }