Configurable monitoring endpoint frequency (#3530)

## Issue Addressed

Closes #3514

## Proposed Changes

- Change default monitoring endpoint frequency to 120 seconds to fit with 30k requests/month limit.
- Allow configuration of the monitoring endpoint frequency using `--monitoring-endpoint-frequency N` where `N` is a value in seconds.
This commit is contained in:
Michael Sproul 2022-09-05 08:29:00 +00:00
parent 177aef8f1e
commit 9a7f7f1c1e
9 changed files with 105 additions and 4 deletions

View File

@ -320,6 +320,15 @@ pub fn cli_app<'a, 'b>() -> App<'a, 'b> {
and never provide an untrusted URL.")
.takes_value(true),
)
.arg(
Arg::with_name("monitoring-endpoint-period")
.long("monitoring-endpoint-period")
.value_name("SECONDS")
.help("Defines how many seconds to wait between each message sent to \
the monitoring-endpoint. Default: 60s")
.requires("monitoring-endpoint")
.takes_value(true),
)
/*
* Standard staking flags

View File

@ -178,9 +178,13 @@ pub fn get_config<E: EthSpec>(
* Explorer metrics
*/
if let Some(monitoring_endpoint) = cli_args.value_of("monitoring-endpoint") {
let update_period_secs =
clap_utils::parse_optional(cli_args, "monitoring-endpoint-period")?;
client_config.monitoring_api = Some(monitoring_api::Config {
db_path: None,
freezer_db_path: None,
update_period_secs,
monitoring_endpoint: monitoring_endpoint.to_string(),
});
}

View File

@ -48,3 +48,39 @@ Check to ensure that the metrics are available on the default port:
```bash
curl localhost:5064/metrics
```
## Remote Monitoring
Lighthouse has the ability to send a subset of metrics to a remote server for collection. Presently
the main server offering remote monitoring is beaconcha.in. Instructions for setting this up
can be found in beaconcha.in's docs:
- <https://kb.beaconcha.in/beaconcha.in-explorer/mobile-app-less-than-greater-than-beacon-node>
The Lighthouse flag for setting the monitoring URL is `--monitoring-endpoint`.
When sending metrics to a remote server you should be conscious of security:
- Only use a monitoring service that you trust: you are sending detailed information about
your validators and beacon node to this service which could be used to track you.
- Always use an HTTPS URL to prevent the traffic being intercepted in transit.
The specification for the monitoring endpoint can be found here:
- <https://github.com/gobitfly/eth2-client-metrics>
_Note: the similarly named [Validator Monitor](./validator-monitoring.md) feature is entirely
independent of remote metric monitoring_.
### Update Period
You can adjust the frequency at which Lighthouse sends metrics to the remote server using the
`--monitoring-endpoint-period` flag. It takes an integer value in seconds, defaulting to 60
seconds.
```
lighthouse bn --monitoring-endpoint-period 60 --monitoring-endpoint "https://url"
```
Increasing the monitoring period between can be useful if you are running into rate limits when
posting large amounts of data for multiple nodes.

View File

@ -4,6 +4,9 @@ Lighthouse allows for fine-grained monitoring of specific validators using the "
Generally users will want to use this function to track their own validators, however, it can be
used for any validator, regardless of who controls it.
_Note: If you are looking for remote metric monitoring, please see the docs on
[Prometheus Metrics](./advanced_metrics.md)_.
## Monitoring is in the Beacon Node
Lighthouse performs validator monitoring in the Beacon Node (BN) instead of the Validator Client

View File

@ -16,7 +16,7 @@ use types::*;
pub use types::ProcessType;
/// Duration after which we collect and send metrics to remote endpoint.
pub const UPDATE_DURATION: u64 = 60;
pub const DEFAULT_UPDATE_DURATION: u64 = 60;
/// Timeout for HTTP requests.
pub const TIMEOUT_DURATION: u64 = 5;
@ -55,6 +55,8 @@ pub struct Config {
/// Path for the cold database required for fetching beacon db size metrics.
/// Note: not relevant for validator and system metrics.
pub freezer_db_path: Option<PathBuf>,
/// User-defined update period in seconds.
pub update_period_secs: Option<u64>,
}
#[derive(Clone)]
@ -64,6 +66,7 @@ pub struct MonitoringHttpClient {
db_path: Option<PathBuf>,
/// Path to the freezer database.
freezer_db_path: Option<PathBuf>,
update_period: Duration,
monitoring_endpoint: SensitiveUrl,
log: slog::Logger,
}
@ -74,6 +77,9 @@ impl MonitoringHttpClient {
client: reqwest::Client::new(),
db_path: config.db_path.clone(),
freezer_db_path: config.freezer_db_path.clone(),
update_period: Duration::from_secs(
config.update_period_secs.unwrap_or(DEFAULT_UPDATE_DURATION),
),
monitoring_endpoint: SensitiveUrl::parse(&config.monitoring_endpoint)
.map_err(|e| format!("Invalid monitoring endpoint: {:?}", e))?,
log,
@ -100,10 +106,15 @@ impl MonitoringHttpClient {
let mut interval = interval_at(
// Have some initial delay for the metrics to get initialized
Instant::now() + Duration::from_secs(25),
Duration::from_secs(UPDATE_DURATION),
self.update_period,
);
info!(self.log, "Starting monitoring api"; "endpoint" => %self.monitoring_endpoint);
info!(
self.log,
"Starting monitoring API";
"endpoint" => %self.monitoring_endpoint,
"update_period" => format!("{}s", self.update_period.as_secs()),
);
let update_future = async move {
loop {

View File

@ -1416,7 +1416,7 @@ fn slasher_backend_override_to_default() {
}
#[test]
pub fn malloc_tuning_flag() {
fn malloc_tuning_flag() {
CommandLineTest::new()
.flag("disable-malloc-tuning", None)
.run_with_zero_port()
@ -1439,3 +1439,16 @@ fn ensure_panic_on_failed_launch() {
assert_eq!(slasher_config.chunk_size, 10);
});
}
#[test]
fn monitoring_endpoint() {
CommandLineTest::new()
.flag("monitoring-endpoint", Some("http://example:8000"))
.flag("monitoring-endpoint-period", Some("30"))
.run_with_zero_port()
.with_config(|config| {
let api_conf = config.monitoring_api.as_ref().unwrap();
assert_eq!(api_conf.monitoring_endpoint.as_str(), "http://example:8000");
assert_eq!(api_conf.update_period_secs, Some(30));
});
}

View File

@ -443,3 +443,16 @@ fn no_strict_fee_recipient_flag() {
.run()
.with_config(|config| assert!(!config.strict_fee_recipient));
}
#[test]
fn monitoring_endpoint() {
CommandLineTest::new()
.flag("monitoring-endpoint", Some("http://example:8000"))
.flag("monitoring-endpoint-period", Some("30"))
.run()
.with_config(|config| {
let api_conf = config.monitoring_api.as_ref().unwrap();
assert_eq!(api_conf.monitoring_endpoint.as_str(), "http://example:8000");
assert_eq!(api_conf.update_period_secs, Some(30));
});
}

View File

@ -236,6 +236,15 @@ pub fn cli_app<'a, 'b>() -> App<'a, 'b> {
and never provide an untrusted URL.")
.takes_value(true),
)
.arg(
Arg::with_name("monitoring-endpoint-period")
.long("monitoring-endpoint-period")
.value_name("SECONDS")
.help("Defines how many seconds to wait between each message sent to \
the monitoring-endpoint. Default: 60s")
.requires("monitoring-endpoint")
.takes_value(true),
)
.arg(
Arg::with_name("enable-doppelganger-protection")
.long("enable-doppelganger-protection")

View File

@ -296,9 +296,12 @@ impl Config {
* Explorer metrics
*/
if let Some(monitoring_endpoint) = cli_args.value_of("monitoring-endpoint") {
let update_period_secs =
clap_utils::parse_optional(cli_args, "monitoring-endpoint-period")?;
config.monitoring_api = Some(monitoring_api::Config {
db_path: None,
freezer_db_path: None,
update_period_secs,
monitoring_endpoint: monitoring_endpoint.to_string(),
});
}