From 98815516a102fd0b3f7c31c516a183282e4f3a30 Mon Sep 17 00:00:00 2001 From: tim gretler Date: Tue, 13 Sep 2022 01:57:44 +0000 Subject: [PATCH] Support histogram buckets (#3391) ## Issue Addressed #3285 ## Proposed Changes Adds support for specifying histogram with buckets and adds new metric buckets for metrics mentioned in issue. ## Additional Info Need some help for the buckets. Co-authored-by: Michael Sproul --- beacon_node/beacon_chain/src/metrics.rs | 29 +++++++++---- beacon_node/network/src/metrics.rs | 9 +++- common/lighthouse_metrics/src/lib.rs | 55 +++++++++++++++++++++++-- 3 files changed, 78 insertions(+), 15 deletions(-) diff --git a/beacon_node/beacon_chain/src/metrics.rs b/beacon_node/beacon_chain/src/metrics.rs index 8030bfa71..cad77a378 100644 --- a/beacon_node/beacon_chain/src/metrics.rs +++ b/beacon_node/beacon_chain/src/metrics.rs @@ -122,14 +122,17 @@ lazy_static! { /* * Block Statistics */ - pub static ref OPERATIONS_PER_BLOCK_ATTESTATION: Result = try_create_histogram( + pub static ref OPERATIONS_PER_BLOCK_ATTESTATION: Result = try_create_histogram_with_buckets( "beacon_operations_per_block_attestation_total", - "Number of attestations in a block" + "Number of attestations in a block", + // Full block is 128. + Ok(vec![0_f64, 1_f64, 3_f64, 15_f64, 31_f64, 63_f64, 127_f64, 255_f64]) ); - pub static ref BLOCK_SIZE: Result = try_create_histogram( + pub static ref BLOCK_SIZE: Result = try_create_histogram_with_buckets( "beacon_block_total_size", - "Size of a signed beacon block" + "Size of a signed beacon block", + linear_buckets(5120_f64,5120_f64,10) ); /* @@ -775,21 +778,29 @@ lazy_static! { /* * Block Delay Metrics */ - pub static ref BEACON_BLOCK_OBSERVED_SLOT_START_DELAY_TIME: Result = try_create_histogram( + pub static ref BEACON_BLOCK_OBSERVED_SLOT_START_DELAY_TIME: Result = try_create_histogram_with_buckets( "beacon_block_observed_slot_start_delay_time", "Duration between the start of the block's slot and the time the block was observed.", + // [0.1, 0.2, 0.5, 1, 2, 5, 10, 20, 50] + decimal_buckets(-1,2) ); - pub static ref BEACON_BLOCK_IMPORTED_OBSERVED_DELAY_TIME: Result = try_create_histogram( + pub static ref BEACON_BLOCK_IMPORTED_OBSERVED_DELAY_TIME: Result = try_create_histogram_with_buckets( "beacon_block_imported_observed_delay_time", "Duration between the time the block was observed and the time when it was imported.", + // [0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1, 2, 5] + decimal_buckets(-2,0) ); - pub static ref BEACON_BLOCK_HEAD_IMPORTED_DELAY_TIME: Result = try_create_histogram( + pub static ref BEACON_BLOCK_HEAD_IMPORTED_DELAY_TIME: Result = try_create_histogram_with_buckets( "beacon_block_head_imported_delay_time", "Duration between the time the block was imported and the time when it was set as head.", - ); - pub static ref BEACON_BLOCK_HEAD_SLOT_START_DELAY_TIME: Result = try_create_histogram( + // [0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1, 2, 5] + decimal_buckets(-2,-1) + ); + pub static ref BEACON_BLOCK_HEAD_SLOT_START_DELAY_TIME: Result = try_create_histogram_with_buckets( "beacon_block_head_slot_start_delay_time", "Duration between the start of the block's slot and the time when it was set as head.", + // [0.1, 0.2, 0.5, 1, 2, 5, 10, 20, 50] + decimal_buckets(-1,2) ); pub static ref BEACON_BLOCK_HEAD_SLOT_START_DELAY_EXCEEDED_TOTAL: Result = try_create_int_counter( "beacon_block_head_slot_start_delay_exceeded_total", diff --git a/beacon_node/network/src/metrics.rs b/beacon_node/network/src/metrics.rs index defb9c600..b4e7a3bac 100644 --- a/beacon_node/network/src/metrics.rs +++ b/beacon_node/network/src/metrics.rs @@ -311,13 +311,18 @@ lazy_static! { /* * Block Delay Metrics */ - pub static ref BEACON_BLOCK_GOSSIP_PROPAGATION_VERIFICATION_DELAY_TIME: Result = try_create_histogram( + pub static ref BEACON_BLOCK_GOSSIP_PROPAGATION_VERIFICATION_DELAY_TIME: Result = try_create_histogram_with_buckets( "beacon_block_gossip_propagation_verification_delay_time", "Duration between when the block is received and when it is verified for propagation.", + // [0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.5] + decimal_buckets(-3,-1) ); - pub static ref BEACON_BLOCK_GOSSIP_SLOT_START_DELAY_TIME: Result = try_create_histogram( + pub static ref BEACON_BLOCK_GOSSIP_SLOT_START_DELAY_TIME: Result = try_create_histogram_with_buckets( "beacon_block_gossip_slot_start_delay_time", "Duration between when the block is received and the start of the slot it belongs to.", + // [0.1, 0.2, 0.5, 1, 2, 5, 10, 20, 50] + decimal_buckets(-1,2) + ); pub static ref BEACON_BLOCK_GOSSIP_ARRIVED_LATE_TOTAL: Result = try_create_int_counter( "beacon_block_gossip_arrived_late_total", diff --git a/common/lighthouse_metrics/src/lib.rs b/common/lighthouse_metrics/src/lib.rs index 98973de1a..5d25bb313 100644 --- a/common/lighthouse_metrics/src/lib.rs +++ b/common/lighthouse_metrics/src/lib.rs @@ -54,14 +54,15 @@ //! } //! ``` -use prometheus::{HistogramOpts, Opts}; +use prometheus::{Error, HistogramOpts, Opts}; use std::time::Duration; use prometheus::core::{Atomic, GenericGauge, GenericGaugeVec}; pub use prometheus::{ + exponential_buckets, linear_buckets, proto::{Metric, MetricFamily, MetricType}, Encoder, Gauge, GaugeVec, Histogram, HistogramTimer, HistogramVec, IntCounter, IntCounterVec, - IntGauge, IntGaugeVec, Result, TextEncoder, + IntGauge, IntGaugeVec, Result, TextEncoder, DEFAULT_BUCKETS, }; /// Collect all the metrics for reporting. @@ -99,7 +100,17 @@ pub fn try_create_float_gauge(name: &str, help: &str) -> Result { /// Attempts to create a `Histogram`, returning `Err` if the registry does not accept the counter /// (potentially due to naming conflict). pub fn try_create_histogram(name: &str, help: &str) -> Result { - let opts = HistogramOpts::new(name, help); + try_create_histogram_with_buckets(name, help, Ok(DEFAULT_BUCKETS.to_vec())) +} + +/// Attempts to create a `Histogram` with specified buckets, returning `Err` if the registry does not accept the counter +/// (potentially due to naming conflict) or no valid buckets are provided. +pub fn try_create_histogram_with_buckets( + name: &str, + help: &str, + buckets: Result>, +) -> Result { + let opts = HistogramOpts::new(name, help).buckets(buckets?); let histogram = Histogram::with_opts(opts)?; prometheus::register(Box::new(histogram.clone()))?; Ok(histogram) @@ -112,7 +123,18 @@ pub fn try_create_histogram_vec( help: &str, label_names: &[&str], ) -> Result { - let opts = HistogramOpts::new(name, help); + try_create_histogram_vec_with_buckets(name, help, Ok(DEFAULT_BUCKETS.to_vec()), label_names) +} + +/// Attempts to create a `HistogramVec` with specified buckets, returning `Err` if the registry does not accept the counter +/// (potentially due to naming conflict) or no valid buckets are provided. +pub fn try_create_histogram_vec_with_buckets( + name: &str, + help: &str, + buckets: Result>, + label_names: &[&str], +) -> Result { + let opts = HistogramOpts::new(name, help).buckets(buckets?); let histogram_vec = HistogramVec::new(opts, label_names)?; prometheus::register(Box::new(histogram_vec.clone()))?; Ok(histogram_vec) @@ -357,3 +379,28 @@ fn duration_to_f64(duration: Duration) -> f64 { let nanos = f64::from(duration.subsec_nanos()) / 1e9; duration.as_secs() as f64 + nanos } + +/// Create buckets using divisors of 10 multiplied by powers of 10, e.g., +/// […, 0.1, 0.2, 0.5, 1, 2, 5, 10, 20, 50, …] +/// +/// The buckets go from `10^min_power` to `5 × 10^max_power`, inclusively. +/// The total number of buckets is `3 * (max_power - min_power + 1)`. +/// +/// assert_eq!(vec![0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0, 20.0, 50.0], decimal_buckets(-1, 1)); +/// assert_eq!(vec![1.0, 2.0, 5.0, 10.0, 20.0, 50.0, 100.0, 200.0, 500.0], decimal_buckets(0, 2)); +pub fn decimal_buckets(min_power: i32, max_power: i32) -> Result> { + if max_power < min_power { + return Err(Error::Msg(format!( + "decimal_buckets min_power needs to be <= max_power, given {} and {}", + min_power, max_power + ))); + } + + let mut buckets = Vec::with_capacity(3 * (max_power - min_power + 1) as usize); + for n in min_power..=max_power { + for m in &[1f64, 2f64, 5f64] { + buckets.push(m * 10f64.powi(n)) + } + } + Ok(buckets) +}