From 48733917be2a59ba87b01a0bc4678347ebb96f4f Mon Sep 17 00:00:00 2001 From: Paul Hauner Date: Sun, 11 Aug 2019 12:12:19 +1000 Subject: [PATCH 01/26] Begin metrics refactor --- beacon_node/beacon_chain/Cargo.toml | 2 +- beacon_node/beacon_chain/src/beacon_chain.rs | 7 +++++++ beacon_node/beacon_chain/src/lib.rs | 6 ++++++ beacon_node/beacon_chain/src/metrics.rs | 12 ++++++++++++ beacon_node/rest_api/Cargo.toml | 1 + beacon_node/rest_api/src/lib.rs | 2 ++ beacon_node/rest_api/src/metrics.rs | 17 +++++++++++++++++ 7 files changed, 46 insertions(+), 1 deletion(-) create mode 100644 beacon_node/rest_api/src/metrics.rs diff --git a/beacon_node/beacon_chain/Cargo.toml b/beacon_node/beacon_chain/Cargo.toml index 778224a3d..43e7614b6 100644 --- a/beacon_node/beacon_chain/Cargo.toml +++ b/beacon_node/beacon_chain/Cargo.toml @@ -17,6 +17,7 @@ sloggers = { version = "^0.3" } slot_clock = { path = "../../eth2/utils/slot_clock" } eth2_ssz = "0.1" eth2_ssz_derive = "0.1" +lazy_static = "1.3.0" state_processing = { path = "../../eth2/state_processing" } tree_hash = "0.1" types = { path = "../../eth2/types" } @@ -24,4 +25,3 @@ lmd_ghost = { path = "../../eth2/lmd_ghost" } [dev-dependencies] rand = "0.5.5" -lazy_static = "1.3.0" diff --git a/beacon_node/beacon_chain/src/beacon_chain.rs b/beacon_node/beacon_chain/src/beacon_chain.rs index 9ccf59589..e31844d58 100644 --- a/beacon_node/beacon_chain/src/beacon_chain.rs +++ b/beacon_node/beacon_chain/src/beacon_chain.rs @@ -2,6 +2,7 @@ use crate::checkpoint::CheckPoint; use crate::errors::{BeaconChainError as Error, BlockProductionError}; use crate::fork_choice::{Error as ForkChoiceError, ForkChoice}; use crate::iter::{ReverseBlockRootIterator, ReverseStateRootIterator}; +use crate::metrics; use crate::metrics::Metrics; use crate::persisted_beacon_chain::{PersistedBeaconChain, BEACON_CHAIN_DB_KEY}; use lmd_ghost::LmdGhost; @@ -848,6 +849,10 @@ impl BeaconChain { return Ok(BlockProcessingOutcome::BlockIsAlreadyKnown); } + // Records the time taken to load the block and state from the database during block + // processing. + let db_read_timer = metrics::BLOCK_PROCESSING_DB_READ.start_timer(); + // Load the blocks parent block from the database, returning invalid if that block is not // found. let parent_block: BeaconBlock = match self.store.get(&block.parent_root)? { @@ -867,6 +872,8 @@ impl BeaconChain { .get(&parent_state_root)? .ok_or_else(|| Error::DBInconsistent(format!("Missing state {}", parent_state_root)))?; + db_read_timer.observe_duration(); + // Transition the parent state to the block slot. let mut state: BeaconState = parent_state; for _ in state.slot.as_u64()..block.slot.as_u64() { diff --git a/beacon_node/beacon_chain/src/lib.rs b/beacon_node/beacon_chain/src/lib.rs index 3188760a4..e24534a2e 100644 --- a/beacon_node/beacon_chain/src/lib.rs +++ b/beacon_node/beacon_chain/src/lib.rs @@ -1,3 +1,8 @@ +#[macro_use] +extern crate prometheus; +#[macro_use] +extern crate lazy_static; + mod beacon_chain; mod checkpoint; mod errors; @@ -13,6 +18,7 @@ pub use self::beacon_chain::{ pub use self::checkpoint::CheckPoint; pub use self::errors::{BeaconChainError, BlockProductionError}; pub use lmd_ghost; +pub use metrics::gather_metrics; pub use parking_lot; pub use slot_clock; pub use state_processing::per_block_processing::errors::{ diff --git a/beacon_node/beacon_chain/src/metrics.rs b/beacon_node/beacon_chain/src/metrics.rs index fa1718ebf..fcb564e32 100644 --- a/beacon_node/beacon_chain/src/metrics.rs +++ b/beacon_node/beacon_chain/src/metrics.rs @@ -1,6 +1,18 @@ pub use prometheus::Error; use prometheus::{Histogram, HistogramOpts, IntCounter, Opts, Registry}; +lazy_static! { + pub static ref BLOCK_PROCESSING_DB_READ: Histogram = register_histogram!( + "block_processing_db_read_times", + "Time spent loading block and state from DB" + ) + .unwrap(); +} + +pub fn gather_metrics() -> Vec { + prometheus::gather() +} + pub struct Metrics { pub block_processing_requests: IntCounter, pub block_processing_successes: IntCounter, diff --git a/beacon_node/rest_api/Cargo.toml b/beacon_node/rest_api/Cargo.toml index fb6cb8413..821d6c0ea 100644 --- a/beacon_node/rest_api/Cargo.toml +++ b/beacon_node/rest_api/Cargo.toml @@ -18,6 +18,7 @@ state_processing = { path = "../../eth2/state_processing" } types = { path = "../../eth2/types" } clap = "2.32.0" http = "^0.1.17" +prometheus = { version = "^0.6", features = ["process"] } hyper = "0.12.32" futures = "0.1" exit-future = "0.1.3" diff --git a/beacon_node/rest_api/src/lib.rs b/beacon_node/rest_api/src/lib.rs index a94a8cdf4..7dc0df578 100644 --- a/beacon_node/rest_api/src/lib.rs +++ b/beacon_node/rest_api/src/lib.rs @@ -3,6 +3,7 @@ extern crate hyper; mod beacon; mod config; mod helpers; +mod metrics; mod node; mod url_query; @@ -103,6 +104,7 @@ pub fn start_server( let result = match (req.method(), path.as_ref()) { (&Method::GET, "/beacon/state") => beacon::get_state::(req), (&Method::GET, "/beacon/state_root") => beacon::get_state_root::(req), + (&Method::GET, "/metrics") => metrics::get_prometheus(req), (&Method::GET, "/node/version") => node::get_version(req), (&Method::GET, "/node/genesis_time") => node::get_genesis_time::(req), _ => Err(ApiError::MethodNotAllowed(path.clone())), diff --git a/beacon_node/rest_api/src/metrics.rs b/beacon_node/rest_api/src/metrics.rs new file mode 100644 index 000000000..1ecdf8b68 --- /dev/null +++ b/beacon_node/rest_api/src/metrics.rs @@ -0,0 +1,17 @@ +use crate::{success_response, ApiError, ApiResult}; +use hyper::{Body, Request}; +use prometheus::{Encoder, TextEncoder}; + +/// Returns the full set of Prometheus metrics for the Beacon Node application. +pub fn get_prometheus(_req: Request) -> ApiResult { + let mut buffer = vec![]; + let encoder = TextEncoder::new(); + + encoder + .encode(&beacon_chain::gather_metrics(), &mut buffer) + .unwrap(); + + String::from_utf8(buffer) + .map(|string| success_response(Body::from(string))) + .map_err(|e| ApiError::ServerError(format!("Failed to encode prometheus info: {:?}", e))) +} From 9995b390b5077ec8c8f92e3fe741590357bad05d Mon Sep 17 00:00:00 2001 From: Paul Hauner Date: Sun, 11 Aug 2019 14:11:13 +1000 Subject: [PATCH 02/26] Move beacon_chain to new metrics structure. --- beacon_node/beacon_chain/src/beacon_chain.rs | 41 ++-- beacon_node/beacon_chain/src/metrics.rs | 242 ++++++++----------- beacon_node/client/src/lib.rs | 5 - 3 files changed, 117 insertions(+), 171 deletions(-) diff --git a/beacon_node/beacon_chain/src/beacon_chain.rs b/beacon_node/beacon_chain/src/beacon_chain.rs index e31844d58..df9523624 100644 --- a/beacon_node/beacon_chain/src/beacon_chain.rs +++ b/beacon_node/beacon_chain/src/beacon_chain.rs @@ -3,7 +3,6 @@ use crate::errors::{BeaconChainError as Error, BlockProductionError}; use crate::fork_choice::{Error as ForkChoiceError, ForkChoice}; use crate::iter::{ReverseBlockRootIterator, ReverseStateRootIterator}; use crate::metrics; -use crate::metrics::Metrics; use crate::persisted_beacon_chain::{PersistedBeaconChain, BEACON_CHAIN_DB_KEY}; use lmd_ghost::LmdGhost; use log::trace; @@ -107,8 +106,6 @@ pub struct BeaconChain { /// A state-machine that is updated with information from the network and chooses a canonical /// head block. pub fork_choice: ForkChoice, - /// Stores metrics about this `BeaconChain`. - pub metrics: Metrics, /// Logging to CLI, etc. log: Logger, } @@ -158,7 +155,6 @@ impl BeaconChain { canonical_head, genesis_block_root, fork_choice: ForkChoice::new(store.clone(), &genesis_block, genesis_block_root), - metrics: Metrics::new()?, store, log, }) @@ -196,7 +192,6 @@ impl BeaconChain { canonical_head: RwLock::new(p.canonical_head), state: RwLock::new(p.state), genesis_block_root: p.genesis_block_root, - metrics: Metrics::new()?, store, log, })) @@ -473,8 +468,8 @@ impl BeaconChain { state: &BeaconState, ) -> Result { // Collect some metrics. - self.metrics.attestation_production_requests.inc(); - let timer = self.metrics.attestation_production_times.start_timer(); + metrics::ATTESTATION_PRODUCTION_REQUESTS.inc(); + let timer = metrics::ATTESTATION_PRODUCTION_TIMES.start_timer(); let slots_per_epoch = T::EthSpec::slots_per_epoch(); let current_epoch_start_slot = state.current_epoch().start_slot(slots_per_epoch); @@ -521,7 +516,7 @@ impl BeaconChain { }; // Collect some metrics. - self.metrics.attestation_production_successes.inc(); + metrics::ATTESTATION_PRODUCTION_SUCCESSES.inc(); timer.observe_duration(); Ok(AttestationData { @@ -708,8 +703,8 @@ impl BeaconChain { state: &BeaconState, block: &BeaconBlock, ) -> Result { - self.metrics.attestation_processing_requests.inc(); - let timer = self.metrics.attestation_processing_times.start_timer(); + metrics::ATTESTATION_PROCESSING_REQUESTS.inc(); + let timer = metrics::ATTESTATION_PROCESSING_TIMES.start_timer(); // Find the highest between: // @@ -754,7 +749,7 @@ impl BeaconChain { .insert_attestation(attestation, state, &self.spec)?; // Update the metrics. - self.metrics.attestation_processing_successes.inc(); + metrics::ATTESTATION_PROCESSING_SUCCESSES.inc(); Ok(AttestationProcessingOutcome::Processed) }; @@ -810,8 +805,8 @@ impl BeaconChain { &self, block: BeaconBlock, ) -> Result { - self.metrics.block_processing_requests.inc(); - let timer = self.metrics.block_processing_times.start_timer(); + metrics::BLOCK_PROCESSING_REQUESTS.inc(); + let timer = metrics::BLOCK_PROCESSING_TIMES.start_timer(); let finalized_slot = self .state @@ -926,10 +921,8 @@ impl BeaconChain { ) }; - self.metrics.block_processing_successes.inc(); - self.metrics - .operations_per_block_attestation - .observe(block.body.attestations.len() as f64); + metrics::BLOCK_PROCESSING_SUCCESSES.inc(); + metrics::OPERATIONS_PER_BLOCK_ATTESTATION.observe(block.body.attestations.len() as f64); timer.observe_duration(); Ok(BlockProcessingOutcome::Processed { block_root }) @@ -965,8 +958,8 @@ impl BeaconChain { produce_at_slot: Slot, randao_reveal: Signature, ) -> Result<(BeaconBlock, BeaconState), BlockProductionError> { - self.metrics.block_production_requests.inc(); - let timer = self.metrics.block_production_times.start_timer(); + metrics::BLOCK_PRODUCTION_REQUESTS.inc(); + let timer = metrics::BLOCK_PRODUCTION_TIMES.start_timer(); // If required, transition the new state to the present slot. while state.slot < produce_at_slot { @@ -1018,7 +1011,7 @@ impl BeaconChain { block.state_root = state_root; - self.metrics.block_production_successes.inc(); + metrics::BLOCK_PRODUCTION_SUCCESSES.inc(); timer.observe_duration(); Ok((block, state)) @@ -1026,10 +1019,10 @@ impl BeaconChain { /// Execute the fork choice algorithm and enthrone the result as the canonical head. pub fn fork_choice(&self) -> Result<(), Error> { - self.metrics.fork_choice_requests.inc(); + metrics::FORK_CHOICE_REQUESTS.inc(); // Start fork choice metrics timer. - let timer = self.metrics.fork_choice_times.start_timer(); + let timer = metrics::FORK_CHOICE_TIMES.start_timer(); // Determine the root of the block that is the head of the chain. let beacon_block_root = self.fork_choice.find_head(&self)?; @@ -1039,7 +1032,7 @@ impl BeaconChain { // If a new head was chosen. if beacon_block_root != self.head().beacon_block_root { - self.metrics.fork_choice_changed_head.inc(); + metrics::FORK_CHOICE_CHANGED_HEAD.inc(); let beacon_block: BeaconBlock = self .store @@ -1057,7 +1050,7 @@ impl BeaconChain { // If we switched to a new chain (instead of building atop the present chain). if self.head().beacon_block_root != beacon_block.parent_root { - self.metrics.fork_choice_reorg_count.inc(); + metrics::FORK_CHOICE_REORG_COUNT.inc(); warn!( self.log, "Beacon chain re-org"; diff --git a/beacon_node/beacon_chain/src/metrics.rs b/beacon_node/beacon_chain/src/metrics.rs index fcb564e32..8b8307e93 100644 --- a/beacon_node/beacon_chain/src/metrics.rs +++ b/beacon_node/beacon_chain/src/metrics.rs @@ -1,155 +1,113 @@ pub use prometheus::Error; -use prometheus::{Histogram, HistogramOpts, IntCounter, Opts, Registry}; +use prometheus::{Histogram, IntCounter}; lazy_static! { + /* + * Block Processing + */ pub static ref BLOCK_PROCESSING_DB_READ: Histogram = register_histogram!( "block_processing_db_read_times", "Time spent loading block and state from DB" ) .unwrap(); + pub static ref BLOCK_PROCESSING_REQUESTS: IntCounter = register_int_counter!( + "block_processing_requests", + "Count of blocks sumbitted for processing" + ) + .unwrap(); + pub static ref BLOCK_PROCESSING_SUCCESSES: IntCounter = register_int_counter!( + "block_processing_successes", + "Count of blocks processed without error" + ) + .unwrap(); + pub static ref BLOCK_PROCESSING_TIMES: Histogram = + register_histogram!("block_processing_times", "Full runtime of block processing") + .unwrap(); + + /* + * Block Production + */ + pub static ref BLOCK_PRODUCTION_REQUESTS: IntCounter = register_int_counter!( + "block_production_requests", + "Count of all block production requests" + ) + .unwrap(); + pub static ref BLOCK_PRODUCTION_SUCCESSES: IntCounter = register_int_counter!( + "block_production_successes", + "Count of blocks sucessfully produced." + ) + .unwrap(); + pub static ref BLOCK_PRODUCTION_TIMES: Histogram = + register_histogram!("block_production_times", "Full runtime of block production").unwrap(); + + /* + * Block Statistics + */ + pub static ref OPERATIONS_PER_BLOCK_ATTESTATION: Histogram = register_histogram!( + "operations_per_block_attestation", + "Number of attestations in a block" + ) + .unwrap(); + + /* + * Attestation Processing + */ + pub static ref ATTESTATION_PROCESSING_REQUESTS: IntCounter = register_int_counter!( + "attestation_processing_requests", + "Count of all attestations submitted for processing" + ) + .unwrap(); + pub static ref ATTESTATION_PROCESSING_SUCCESSES: IntCounter = register_int_counter!( + "attestation_processing_successes", + "total_attestation_processing_successes" + ) + .unwrap(); + pub static ref ATTESTATION_PROCESSING_TIMES: Histogram = register_histogram!( + "attestation_processing_times", + "Full runtime of attestation processing" + ) + .unwrap(); + + /* + * Attestation Production + */ + pub static ref ATTESTATION_PRODUCTION_REQUESTS: IntCounter = register_int_counter!( + "attestation_production_requests", + "Count of all attestation production requests" + ) + .unwrap(); + pub static ref ATTESTATION_PRODUCTION_SUCCESSES: IntCounter = register_int_counter!( + "attestation_production_successes", + "Count of attestations processed without error" + ) + .unwrap(); + pub static ref ATTESTATION_PRODUCTION_TIMES: Histogram = register_histogram!( + "attestation_production_times", + "Full runtime of attestation production" + ).unwrap(); + + /* + * Fork Choice + */ + pub static ref FORK_CHOICE_REQUESTS: IntCounter = register_int_counter!( + "fork_choice_requests", + "Count of occasions where fork choice has tried to find a head" + ) + .unwrap(); + pub static ref FORK_CHOICE_CHANGED_HEAD: IntCounter = register_int_counter!( + "fork_choice_changed_head", + "Count of occasions fork choice has found a new head" + ) + .unwrap(); + pub static ref FORK_CHOICE_REORG_COUNT: IntCounter = register_int_counter!( + "fork_choice_reorg_count", + "Count of occasions fork choice has switched to a different chain" + ) + .unwrap(); + pub static ref FORK_CHOICE_TIMES: Histogram = + register_histogram!("fork_choice_time", "Full runtime of fork choice").unwrap(); } pub fn gather_metrics() -> Vec { prometheus::gather() } - -pub struct Metrics { - pub block_processing_requests: IntCounter, - pub block_processing_successes: IntCounter, - pub block_processing_times: Histogram, - pub block_production_requests: IntCounter, - pub block_production_successes: IntCounter, - pub block_production_times: Histogram, - pub attestation_production_requests: IntCounter, - pub attestation_production_successes: IntCounter, - pub attestation_production_times: Histogram, - pub attestation_processing_requests: IntCounter, - pub attestation_processing_successes: IntCounter, - pub attestation_processing_times: Histogram, - pub fork_choice_requests: IntCounter, - pub fork_choice_changed_head: IntCounter, - pub fork_choice_reorg_count: IntCounter, - pub fork_choice_times: Histogram, - pub operations_per_block_attestation: Histogram, -} - -impl Metrics { - pub fn new() -> Result { - Ok(Self { - block_processing_requests: { - let opts = Opts::new("block_processing_requests", "total_blocks_processed"); - IntCounter::with_opts(opts)? - }, - block_processing_successes: { - let opts = Opts::new("block_processing_successes", "total_valid_blocks_processed"); - IntCounter::with_opts(opts)? - }, - block_processing_times: { - let opts = HistogramOpts::new("block_processing_times", "block_processing_time"); - Histogram::with_opts(opts)? - }, - block_production_requests: { - let opts = Opts::new("block_production_requests", "attempts_to_produce_new_block"); - IntCounter::with_opts(opts)? - }, - block_production_successes: { - let opts = Opts::new("block_production_successes", "blocks_successfully_produced"); - IntCounter::with_opts(opts)? - }, - block_production_times: { - let opts = HistogramOpts::new("block_production_times", "block_production_time"); - Histogram::with_opts(opts)? - }, - attestation_production_requests: { - let opts = Opts::new( - "attestation_production_requests", - "total_attestation_production_requests", - ); - IntCounter::with_opts(opts)? - }, - attestation_production_successes: { - let opts = Opts::new( - "attestation_production_successes", - "total_attestation_production_successes", - ); - IntCounter::with_opts(opts)? - }, - attestation_production_times: { - let opts = HistogramOpts::new( - "attestation_production_times", - "attestation_production_time", - ); - Histogram::with_opts(opts)? - }, - attestation_processing_requests: { - let opts = Opts::new( - "attestation_processing_requests", - "total_attestation_processing_requests", - ); - IntCounter::with_opts(opts)? - }, - attestation_processing_successes: { - let opts = Opts::new( - "attestation_processing_successes", - "total_attestation_processing_successes", - ); - IntCounter::with_opts(opts)? - }, - attestation_processing_times: { - let opts = HistogramOpts::new( - "attestation_processing_times", - "attestation_processing_time", - ); - Histogram::with_opts(opts)? - }, - fork_choice_requests: { - let opts = Opts::new("fork_choice_requests", "total_times_fork_choice_called"); - IntCounter::with_opts(opts)? - }, - fork_choice_changed_head: { - let opts = Opts::new( - "fork_choice_changed_head", - "total_times_fork_choice_chose_a_new_head", - ); - IntCounter::with_opts(opts)? - }, - fork_choice_reorg_count: { - let opts = Opts::new("fork_choice_reorg_count", "number_of_reorgs"); - IntCounter::with_opts(opts)? - }, - fork_choice_times: { - let opts = HistogramOpts::new("fork_choice_time", "total_time_to_run_fork_choice"); - Histogram::with_opts(opts)? - }, - operations_per_block_attestation: { - let opts = HistogramOpts::new( - "operations_per_block_attestation", - "count_of_attestations_per_block", - ); - Histogram::with_opts(opts)? - }, - }) - } - - pub fn register(&self, registry: &Registry) -> Result<(), Error> { - registry.register(Box::new(self.block_processing_requests.clone()))?; - registry.register(Box::new(self.block_processing_successes.clone()))?; - registry.register(Box::new(self.block_processing_times.clone()))?; - registry.register(Box::new(self.block_production_requests.clone()))?; - registry.register(Box::new(self.block_production_successes.clone()))?; - registry.register(Box::new(self.block_production_times.clone()))?; - registry.register(Box::new(self.attestation_production_requests.clone()))?; - registry.register(Box::new(self.attestation_production_successes.clone()))?; - registry.register(Box::new(self.attestation_production_times.clone()))?; - registry.register(Box::new(self.attestation_processing_requests.clone()))?; - registry.register(Box::new(self.attestation_processing_successes.clone()))?; - registry.register(Box::new(self.attestation_processing_times.clone()))?; - registry.register(Box::new(self.fork_choice_requests.clone()))?; - registry.register(Box::new(self.fork_choice_changed_head.clone()))?; - registry.register(Box::new(self.fork_choice_reorg_count.clone()))?; - registry.register(Box::new(self.fork_choice_times.clone()))?; - registry.register(Box::new(self.operations_per_block_attestation.clone()))?; - - Ok(()) - } -} diff --git a/beacon_node/client/src/lib.rs b/beacon_node/client/src/lib.rs index 65ba071fa..e06c5b60e 100644 --- a/beacon_node/client/src/lib.rs +++ b/beacon_node/client/src/lib.rs @@ -71,11 +71,6 @@ where eth2_config.spec.clone(), log.clone(), )?); - // Registry all beacon chain metrics with the global registry. - beacon_chain - .metrics - .register(&metrics_registry) - .expect("Failed to registry metrics"); if beacon_chain.read_slot_clock().is_none() { panic!("Cannot start client before genesis!") From e33d0703efcff8d37936968d3b7d591b4ab07b2a Mon Sep 17 00:00:00 2001 From: Paul Hauner Date: Sun, 11 Aug 2019 14:43:31 +1000 Subject: [PATCH 03/26] Make metrics not panic if already defined --- beacon_node/beacon_chain/src/beacon_chain.rs | 51 +++++---- beacon_node/beacon_chain/src/metrics.rs | 113 ++++++++++--------- 2 files changed, 88 insertions(+), 76 deletions(-) diff --git a/beacon_node/beacon_chain/src/beacon_chain.rs b/beacon_node/beacon_chain/src/beacon_chain.rs index df9523624..b0bb6a159 100644 --- a/beacon_node/beacon_chain/src/beacon_chain.rs +++ b/beacon_node/beacon_chain/src/beacon_chain.rs @@ -468,8 +468,8 @@ impl BeaconChain { state: &BeaconState, ) -> Result { // Collect some metrics. - metrics::ATTESTATION_PRODUCTION_REQUESTS.inc(); - let timer = metrics::ATTESTATION_PRODUCTION_TIMES.start_timer(); + metrics::inc_counter(&metrics::ATTESTATION_PRODUCTION_REQUESTS); + let timer = metrics::start_timer(&metrics::ATTESTATION_PRODUCTION_TIMES); let slots_per_epoch = T::EthSpec::slots_per_epoch(); let current_epoch_start_slot = state.current_epoch().start_slot(slots_per_epoch); @@ -516,8 +516,8 @@ impl BeaconChain { }; // Collect some metrics. - metrics::ATTESTATION_PRODUCTION_SUCCESSES.inc(); - timer.observe_duration(); + metrics::inc_counter(&metrics::ATTESTATION_PRODUCTION_SUCCESSES); + metrics::stop_timer(timer); Ok(AttestationData { beacon_block_root: head_block_root, @@ -703,8 +703,8 @@ impl BeaconChain { state: &BeaconState, block: &BeaconBlock, ) -> Result { - metrics::ATTESTATION_PROCESSING_REQUESTS.inc(); - let timer = metrics::ATTESTATION_PROCESSING_TIMES.start_timer(); + metrics::inc_counter(&metrics::ATTESTATION_PROCESSING_REQUESTS); + let timer = metrics::start_timer(&metrics::ATTESTATION_PROCESSING_TIMES); // Find the highest between: // @@ -749,12 +749,12 @@ impl BeaconChain { .insert_attestation(attestation, state, &self.spec)?; // Update the metrics. - metrics::ATTESTATION_PROCESSING_SUCCESSES.inc(); + metrics::inc_counter(&metrics::ATTESTATION_PROCESSING_SUCCESSES); Ok(AttestationProcessingOutcome::Processed) }; - timer.observe_duration(); + timer.map(|t| t.observe_duration()); result } @@ -805,8 +805,8 @@ impl BeaconChain { &self, block: BeaconBlock, ) -> Result { - metrics::BLOCK_PROCESSING_REQUESTS.inc(); - let timer = metrics::BLOCK_PROCESSING_TIMES.start_timer(); + metrics::inc_counter(&metrics::BLOCK_PROCESSING_REQUESTS); + let timer = metrics::start_timer(&metrics::BLOCK_PROCESSING_TIMES); let finalized_slot = self .state @@ -846,7 +846,7 @@ impl BeaconChain { // Records the time taken to load the block and state from the database during block // processing. - let db_read_timer = metrics::BLOCK_PROCESSING_DB_READ.start_timer(); + let db_read_timer = metrics::start_timer(&metrics::BLOCK_PROCESSING_DB_READ); // Load the blocks parent block from the database, returning invalid if that block is not // found. @@ -867,7 +867,7 @@ impl BeaconChain { .get(&parent_state_root)? .ok_or_else(|| Error::DBInconsistent(format!("Missing state {}", parent_state_root)))?; - db_read_timer.observe_duration(); + metrics::stop_timer(db_read_timer); // Transition the parent state to the block slot. let mut state: BeaconState = parent_state; @@ -921,9 +921,12 @@ impl BeaconChain { ) }; - metrics::BLOCK_PROCESSING_SUCCESSES.inc(); - metrics::OPERATIONS_PER_BLOCK_ATTESTATION.observe(block.body.attestations.len() as f64); - timer.observe_duration(); + metrics::inc_counter(&metrics::BLOCK_PROCESSING_SUCCESSES); + metrics::observe( + &metrics::OPERATIONS_PER_BLOCK_ATTESTATION, + block.body.attestations.len() as f64, + ); + metrics::stop_timer(timer); Ok(BlockProcessingOutcome::Processed { block_root }) } @@ -958,8 +961,8 @@ impl BeaconChain { produce_at_slot: Slot, randao_reveal: Signature, ) -> Result<(BeaconBlock, BeaconState), BlockProductionError> { - metrics::BLOCK_PRODUCTION_REQUESTS.inc(); - let timer = metrics::BLOCK_PRODUCTION_TIMES.start_timer(); + metrics::inc_counter(&metrics::BLOCK_PRODUCTION_REQUESTS); + let timer = metrics::start_timer(&metrics::BLOCK_PRODUCTION_TIMES); // If required, transition the new state to the present slot. while state.slot < produce_at_slot { @@ -1011,28 +1014,28 @@ impl BeaconChain { block.state_root = state_root; - metrics::BLOCK_PRODUCTION_SUCCESSES.inc(); - timer.observe_duration(); + metrics::inc_counter(&metrics::BLOCK_PRODUCTION_SUCCESSES); + metrics::stop_timer(timer); Ok((block, state)) } /// Execute the fork choice algorithm and enthrone the result as the canonical head. pub fn fork_choice(&self) -> Result<(), Error> { - metrics::FORK_CHOICE_REQUESTS.inc(); + metrics::inc_counter(&metrics::FORK_CHOICE_REQUESTS); // Start fork choice metrics timer. - let timer = metrics::FORK_CHOICE_TIMES.start_timer(); + let timer = metrics::start_timer(&metrics::FORK_CHOICE_TIMES); // Determine the root of the block that is the head of the chain. let beacon_block_root = self.fork_choice.find_head(&self)?; // End fork choice metrics timer. - timer.observe_duration(); + metrics::stop_timer(timer); // If a new head was chosen. if beacon_block_root != self.head().beacon_block_root { - metrics::FORK_CHOICE_CHANGED_HEAD.inc(); + metrics::inc_counter(&metrics::FORK_CHOICE_CHANGED_HEAD); let beacon_block: BeaconBlock = self .store @@ -1050,7 +1053,7 @@ impl BeaconChain { // If we switched to a new chain (instead of building atop the present chain). if self.head().beacon_block_root != beacon_block.parent_root { - metrics::FORK_CHOICE_REORG_COUNT.inc(); + metrics::inc_counter(&metrics::FORK_CHOICE_REORG_COUNT); warn!( self.log, "Beacon chain re-org"; diff --git a/beacon_node/beacon_chain/src/metrics.rs b/beacon_node/beacon_chain/src/metrics.rs index 8b8307e93..417c2904a 100644 --- a/beacon_node/beacon_chain/src/metrics.rs +++ b/beacon_node/beacon_chain/src/metrics.rs @@ -1,111 +1,120 @@ pub use prometheus::Error; -use prometheus::{Histogram, IntCounter}; +use prometheus::{Histogram, HistogramTimer, IntCounter, Result}; + +pub fn start_timer(histogram: &Result) -> Option { + if let Ok(histogram) = histogram { + Some(histogram.start_timer()) + } else { + None + } +} + +pub fn stop_timer(timer: Option) { + timer.map(|t| t.observe_duration()); +} + +pub fn inc_counter(counter: &Result) { + if let Ok(counter) = counter { + counter.inc(); + } +} + +pub fn observe(histogram: &Result, value: f64) { + if let Ok(histogram) = histogram { + histogram.observe(value); + } +} lazy_static! { /* * Block Processing */ - pub static ref BLOCK_PROCESSING_DB_READ: Histogram = register_histogram!( + pub static ref BLOCK_PROCESSING_DB_READ: Result = register_histogram!( "block_processing_db_read_times", "Time spent loading block and state from DB" - ) - .unwrap(); - pub static ref BLOCK_PROCESSING_REQUESTS: IntCounter = register_int_counter!( + ); + pub static ref BLOCK_PROCESSING_REQUESTS: Result = register_int_counter!( "block_processing_requests", "Count of blocks sumbitted for processing" - ) - .unwrap(); - pub static ref BLOCK_PROCESSING_SUCCESSES: IntCounter = register_int_counter!( + ); + pub static ref BLOCK_PROCESSING_SUCCESSES: Result = register_int_counter!( "block_processing_successes", "Count of blocks processed without error" - ) - .unwrap(); - pub static ref BLOCK_PROCESSING_TIMES: Histogram = - register_histogram!("block_processing_times", "Full runtime of block processing") - .unwrap(); + ); + pub static ref BLOCK_PROCESSING_TIMES: Result = + register_histogram!("block_processing_times", "Full runtime of block processing"); /* * Block Production */ - pub static ref BLOCK_PRODUCTION_REQUESTS: IntCounter = register_int_counter!( + pub static ref BLOCK_PRODUCTION_REQUESTS: Result = register_int_counter!( "block_production_requests", "Count of all block production requests" - ) - .unwrap(); - pub static ref BLOCK_PRODUCTION_SUCCESSES: IntCounter = register_int_counter!( + ); + pub static ref BLOCK_PRODUCTION_SUCCESSES: Result = register_int_counter!( "block_production_successes", "Count of blocks sucessfully produced." - ) - .unwrap(); - pub static ref BLOCK_PRODUCTION_TIMES: Histogram = - register_histogram!("block_production_times", "Full runtime of block production").unwrap(); + ); + pub static ref BLOCK_PRODUCTION_TIMES: Result = + register_histogram!("block_production_times", "Full runtime of block production"); /* * Block Statistics */ - pub static ref OPERATIONS_PER_BLOCK_ATTESTATION: Histogram = register_histogram!( + pub static ref OPERATIONS_PER_BLOCK_ATTESTATION: Result = register_histogram!( "operations_per_block_attestation", "Number of attestations in a block" - ) - .unwrap(); + ); /* * Attestation Processing */ - pub static ref ATTESTATION_PROCESSING_REQUESTS: IntCounter = register_int_counter!( + pub static ref ATTESTATION_PROCESSING_REQUESTS: Result = register_int_counter!( "attestation_processing_requests", "Count of all attestations submitted for processing" - ) - .unwrap(); - pub static ref ATTESTATION_PROCESSING_SUCCESSES: IntCounter = register_int_counter!( + ); + pub static ref ATTESTATION_PROCESSING_SUCCESSES: Result = register_int_counter!( "attestation_processing_successes", "total_attestation_processing_successes" - ) - .unwrap(); - pub static ref ATTESTATION_PROCESSING_TIMES: Histogram = register_histogram!( + ); + pub static ref ATTESTATION_PROCESSING_TIMES: Result = register_histogram!( "attestation_processing_times", "Full runtime of attestation processing" - ) - .unwrap(); + ); /* * Attestation Production */ - pub static ref ATTESTATION_PRODUCTION_REQUESTS: IntCounter = register_int_counter!( + pub static ref ATTESTATION_PRODUCTION_REQUESTS: Result = register_int_counter!( "attestation_production_requests", "Count of all attestation production requests" - ) - .unwrap(); - pub static ref ATTESTATION_PRODUCTION_SUCCESSES: IntCounter = register_int_counter!( + ); + pub static ref ATTESTATION_PRODUCTION_SUCCESSES: Result = register_int_counter!( "attestation_production_successes", "Count of attestations processed without error" - ) - .unwrap(); - pub static ref ATTESTATION_PRODUCTION_TIMES: Histogram = register_histogram!( + ); + pub static ref ATTESTATION_PRODUCTION_TIMES: Result = register_histogram!( "attestation_production_times", "Full runtime of attestation production" - ).unwrap(); + ); /* * Fork Choice */ - pub static ref FORK_CHOICE_REQUESTS: IntCounter = register_int_counter!( + pub static ref FORK_CHOICE_REQUESTS: Result = register_int_counter!( "fork_choice_requests", "Count of occasions where fork choice has tried to find a head" - ) - .unwrap(); - pub static ref FORK_CHOICE_CHANGED_HEAD: IntCounter = register_int_counter!( + ); + pub static ref FORK_CHOICE_CHANGED_HEAD: Result = register_int_counter!( "fork_choice_changed_head", "Count of occasions fork choice has found a new head" - ) - .unwrap(); - pub static ref FORK_CHOICE_REORG_COUNT: IntCounter = register_int_counter!( + ); + pub static ref FORK_CHOICE_REORG_COUNT: Result = register_int_counter!( "fork_choice_reorg_count", "Count of occasions fork choice has switched to a different chain" - ) - .unwrap(); - pub static ref FORK_CHOICE_TIMES: Histogram = - register_histogram!("fork_choice_time", "Full runtime of fork choice").unwrap(); + ); + pub static ref FORK_CHOICE_TIMES: Result = + register_histogram!("fork_choice_time", "Full runtime of fork choice"); } pub fn gather_metrics() -> Vec { From 36ff115b04a90f767e08d5b52754a643aa2c950d Mon Sep 17 00:00:00 2001 From: Paul Hauner Date: Sun, 11 Aug 2019 14:46:20 +1000 Subject: [PATCH 04/26] Use global prometheus gather at rest api --- beacon_node/beacon_chain/src/metrics.rs | 4 ---- beacon_node/rest_api/src/metrics.rs | 4 +--- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/beacon_node/beacon_chain/src/metrics.rs b/beacon_node/beacon_chain/src/metrics.rs index 417c2904a..dc2919cc4 100644 --- a/beacon_node/beacon_chain/src/metrics.rs +++ b/beacon_node/beacon_chain/src/metrics.rs @@ -116,7 +116,3 @@ lazy_static! { pub static ref FORK_CHOICE_TIMES: Result = register_histogram!("fork_choice_time", "Full runtime of fork choice"); } - -pub fn gather_metrics() -> Vec { - prometheus::gather() -} diff --git a/beacon_node/rest_api/src/metrics.rs b/beacon_node/rest_api/src/metrics.rs index 1ecdf8b68..b0f5b8605 100644 --- a/beacon_node/rest_api/src/metrics.rs +++ b/beacon_node/rest_api/src/metrics.rs @@ -7,9 +7,7 @@ pub fn get_prometheus(_req: Request) -> ApiResult { let mut buffer = vec![]; let encoder = TextEncoder::new(); - encoder - .encode(&beacon_chain::gather_metrics(), &mut buffer) - .unwrap(); + encoder.encode(&prometheus::gather(), &mut buffer).unwrap(); String::from_utf8(buffer) .map(|string| success_response(Body::from(string))) From 2108895fca7c34928b7d0540d8ea0c84740d56ac Mon Sep 17 00:00:00 2001 From: Paul Hauner Date: Sun, 11 Aug 2019 15:34:10 +1000 Subject: [PATCH 05/26] Unify common metric fns into a crate --- Cargo.toml | 1 + beacon_node/beacon_chain/Cargo.toml | 2 +- beacon_node/beacon_chain/src/errors.rs | 8 --- beacon_node/beacon_chain/src/lib.rs | 3 -- beacon_node/beacon_chain/src/metrics.rs | 67 ++++++++---------------- eth2/utils/lighthouse_metrics/Cargo.toml | 11 ++++ eth2/utils/lighthouse_metrics/src/lib.rs | 49 +++++++++++++++++ 7 files changed, 83 insertions(+), 58 deletions(-) create mode 100644 eth2/utils/lighthouse_metrics/Cargo.toml create mode 100644 eth2/utils/lighthouse_metrics/src/lib.rs diff --git a/Cargo.toml b/Cargo.toml index f5ee02a17..9b7b87a0d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,6 +11,7 @@ members = [ "eth2/utils/eth2_interop_keypairs", "eth2/utils/logging", "eth2/utils/eth2_hashing", + "eth2/utils/lighthouse_metrics", "eth2/utils/merkle_proof", "eth2/utils/int_to_bytes", "eth2/utils/serde_hex", diff --git a/beacon_node/beacon_chain/Cargo.toml b/beacon_node/beacon_chain/Cargo.toml index 43e7614b6..850aa2e94 100644 --- a/beacon_node/beacon_chain/Cargo.toml +++ b/beacon_node/beacon_chain/Cargo.toml @@ -7,7 +7,7 @@ edition = "2018" [dependencies] store = { path = "../store" } parking_lot = "0.7" -prometheus = "^0.6" +lighthouse_metrics = { path = "../../eth2/utils/lighthouse_metrics" } log = "0.4" operation_pool = { path = "../../eth2/operation_pool" } serde = "1.0" diff --git a/beacon_node/beacon_chain/src/errors.rs b/beacon_node/beacon_chain/src/errors.rs index 7a51fc425..22df90397 100644 --- a/beacon_node/beacon_chain/src/errors.rs +++ b/beacon_node/beacon_chain/src/errors.rs @@ -1,5 +1,4 @@ use crate::fork_choice::Error as ForkChoiceError; -use crate::metrics::Error as MetricsError; use state_processing::per_block_processing::errors::{ AttestationValidationError, IndexedAttestationValidationError, }; @@ -34,7 +33,6 @@ pub enum BeaconChainError { MissingBeaconBlock(Hash256), MissingBeaconState(Hash256), SlotProcessingError(SlotProcessingError), - MetricsError(String), NoStateForAttestation { beacon_block_root: Hash256, }, @@ -44,12 +42,6 @@ pub enum BeaconChainError { easy_from_to!(SlotProcessingError, BeaconChainError); -impl From for BeaconChainError { - fn from(e: MetricsError) -> BeaconChainError { - BeaconChainError::MetricsError(format!("{:?}", e)) - } -} - #[derive(Debug, PartialEq)] pub enum BlockProductionError { UnableToGetBlockRootFromState, diff --git a/beacon_node/beacon_chain/src/lib.rs b/beacon_node/beacon_chain/src/lib.rs index e24534a2e..98bd60a35 100644 --- a/beacon_node/beacon_chain/src/lib.rs +++ b/beacon_node/beacon_chain/src/lib.rs @@ -1,6 +1,4 @@ #[macro_use] -extern crate prometheus; -#[macro_use] extern crate lazy_static; mod beacon_chain; @@ -18,7 +16,6 @@ pub use self::beacon_chain::{ pub use self::checkpoint::CheckPoint; pub use self::errors::{BeaconChainError, BlockProductionError}; pub use lmd_ghost; -pub use metrics::gather_metrics; pub use parking_lot; pub use slot_clock; pub use state_processing::per_block_processing::errors::{ diff --git a/beacon_node/beacon_chain/src/metrics.rs b/beacon_node/beacon_chain/src/metrics.rs index dc2919cc4..03f4783ff 100644 --- a/beacon_node/beacon_chain/src/metrics.rs +++ b/beacon_node/beacon_chain/src/metrics.rs @@ -1,67 +1,42 @@ -pub use prometheus::Error; -use prometheus::{Histogram, HistogramTimer, IntCounter, Result}; - -pub fn start_timer(histogram: &Result) -> Option { - if let Ok(histogram) = histogram { - Some(histogram.start_timer()) - } else { - None - } -} - -pub fn stop_timer(timer: Option) { - timer.map(|t| t.observe_duration()); -} - -pub fn inc_counter(counter: &Result) { - if let Ok(counter) = counter { - counter.inc(); - } -} - -pub fn observe(histogram: &Result, value: f64) { - if let Ok(histogram) = histogram { - histogram.observe(value); - } -} +pub use lighthouse_metrics::*; lazy_static! { /* * Block Processing */ - pub static ref BLOCK_PROCESSING_DB_READ: Result = register_histogram!( + pub static ref BLOCK_PROCESSING_DB_READ: Result = try_create_histogram( "block_processing_db_read_times", "Time spent loading block and state from DB" ); - pub static ref BLOCK_PROCESSING_REQUESTS: Result = register_int_counter!( + pub static ref BLOCK_PROCESSING_REQUESTS: Result = try_create_int_counter( "block_processing_requests", - "Count of blocks sumbitted for processing" + "Count of blocks submitted for processing" ); - pub static ref BLOCK_PROCESSING_SUCCESSES: Result = register_int_counter!( + pub static ref BLOCK_PROCESSING_SUCCESSES: Result = try_create_int_counter( "block_processing_successes", "Count of blocks processed without error" ); pub static ref BLOCK_PROCESSING_TIMES: Result = - register_histogram!("block_processing_times", "Full runtime of block processing"); + try_create_histogram("block_processing_times", "Full runtime of block processing"); /* * Block Production */ - pub static ref BLOCK_PRODUCTION_REQUESTS: Result = register_int_counter!( + pub static ref BLOCK_PRODUCTION_REQUESTS: Result = try_create_int_counter( "block_production_requests", "Count of all block production requests" ); - pub static ref BLOCK_PRODUCTION_SUCCESSES: Result = register_int_counter!( + pub static ref BLOCK_PRODUCTION_SUCCESSES: Result = try_create_int_counter( "block_production_successes", - "Count of blocks sucessfully produced." + "Count of blocks successfully produced." ); pub static ref BLOCK_PRODUCTION_TIMES: Result = - register_histogram!("block_production_times", "Full runtime of block production"); + try_create_histogram("block_production_times", "Full runtime of block production"); /* * Block Statistics */ - pub static ref OPERATIONS_PER_BLOCK_ATTESTATION: Result = register_histogram!( + pub static ref OPERATIONS_PER_BLOCK_ATTESTATION: Result = try_create_histogram( "operations_per_block_attestation", "Number of attestations in a block" ); @@ -69,15 +44,15 @@ lazy_static! { /* * Attestation Processing */ - pub static ref ATTESTATION_PROCESSING_REQUESTS: Result = register_int_counter!( + pub static ref ATTESTATION_PROCESSING_REQUESTS: Result = try_create_int_counter( "attestation_processing_requests", "Count of all attestations submitted for processing" ); - pub static ref ATTESTATION_PROCESSING_SUCCESSES: Result = register_int_counter!( + pub static ref ATTESTATION_PROCESSING_SUCCESSES: Result = try_create_int_counter( "attestation_processing_successes", "total_attestation_processing_successes" ); - pub static ref ATTESTATION_PROCESSING_TIMES: Result = register_histogram!( + pub static ref ATTESTATION_PROCESSING_TIMES: Result = try_create_histogram( "attestation_processing_times", "Full runtime of attestation processing" ); @@ -85,15 +60,15 @@ lazy_static! { /* * Attestation Production */ - pub static ref ATTESTATION_PRODUCTION_REQUESTS: Result = register_int_counter!( + pub static ref ATTESTATION_PRODUCTION_REQUESTS: Result = try_create_int_counter( "attestation_production_requests", "Count of all attestation production requests" ); - pub static ref ATTESTATION_PRODUCTION_SUCCESSES: Result = register_int_counter!( + pub static ref ATTESTATION_PRODUCTION_SUCCESSES: Result = try_create_int_counter( "attestation_production_successes", "Count of attestations processed without error" ); - pub static ref ATTESTATION_PRODUCTION_TIMES: Result = register_histogram!( + pub static ref ATTESTATION_PRODUCTION_TIMES: Result = try_create_histogram( "attestation_production_times", "Full runtime of attestation production" ); @@ -101,18 +76,18 @@ lazy_static! { /* * Fork Choice */ - pub static ref FORK_CHOICE_REQUESTS: Result = register_int_counter!( + pub static ref FORK_CHOICE_REQUESTS: Result = try_create_int_counter( "fork_choice_requests", "Count of occasions where fork choice has tried to find a head" ); - pub static ref FORK_CHOICE_CHANGED_HEAD: Result = register_int_counter!( + pub static ref FORK_CHOICE_CHANGED_HEAD: Result = try_create_int_counter( "fork_choice_changed_head", "Count of occasions fork choice has found a new head" ); - pub static ref FORK_CHOICE_REORG_COUNT: Result = register_int_counter!( + pub static ref FORK_CHOICE_REORG_COUNT: Result = try_create_int_counter( "fork_choice_reorg_count", "Count of occasions fork choice has switched to a different chain" ); pub static ref FORK_CHOICE_TIMES: Result = - register_histogram!("fork_choice_time", "Full runtime of fork choice"); + try_create_histogram("fork_choice_time", "Full runtime of fork choice"); } diff --git a/eth2/utils/lighthouse_metrics/Cargo.toml b/eth2/utils/lighthouse_metrics/Cargo.toml new file mode 100644 index 000000000..0a24a96fb --- /dev/null +++ b/eth2/utils/lighthouse_metrics/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "lighthouse_metrics" +version = "0.1.0" +authors = ["Paul Hauner "] +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +lazy_static = "1.3.0" +prometheus = "^0.6" diff --git a/eth2/utils/lighthouse_metrics/src/lib.rs b/eth2/utils/lighthouse_metrics/src/lib.rs new file mode 100644 index 000000000..e6e30f6bb --- /dev/null +++ b/eth2/utils/lighthouse_metrics/src/lib.rs @@ -0,0 +1,49 @@ +use prometheus::{HistogramOpts, HistogramTimer, Opts}; + +pub use prometheus::{Histogram, IntCounter, Result}; + +pub fn try_create_int_counter(name: &str, help: &str) -> Result { + let opts = Opts::new(name, help); + let counter = IntCounter::with_opts(opts)?; + prometheus::register(Box::new(counter.clone()))?; + Ok(counter) +} + +pub fn try_create_histogram(name: &str, help: &str) -> Result { + let opts = HistogramOpts::new(name, help); + let histogram = Histogram::with_opts(opts)?; + prometheus::register(Box::new(histogram.clone()))?; + Ok(histogram) +} + +pub fn start_timer(histogram: &Result) -> Option { + if let Ok(histogram) = histogram { + Some(histogram.start_timer()) + } else { + None + } +} + +pub fn stop_timer(timer: Option) { + timer.map(|t| t.observe_duration()); +} + +pub fn inc_counter(counter: &Result) { + if let Ok(counter) = counter { + counter.inc(); + } +} + +pub fn observe(histogram: &Result, value: f64) { + if let Ok(histogram) = histogram { + histogram.observe(value); + } +} + +#[cfg(test)] +mod tests { + #[test] + fn it_works() { + assert_eq!(2 + 2, 4); + } +} From 441eb41b6bd3a36d5f673c23d392ef5a9796706d Mon Sep 17 00:00:00 2001 From: Paul Hauner Date: Sun, 11 Aug 2019 15:53:34 +1000 Subject: [PATCH 06/26] Add heavy metering to block processing --- beacon_node/beacon_chain/src/beacon_chain.rs | 34 ++++++++++++++++-- beacon_node/beacon_chain/src/metrics.rs | 36 +++++++++++++++++--- 2 files changed, 64 insertions(+), 6 deletions(-) diff --git a/beacon_node/beacon_chain/src/beacon_chain.rs b/beacon_node/beacon_chain/src/beacon_chain.rs index b0bb6a159..f5fb954b9 100644 --- a/beacon_node/beacon_chain/src/beacon_chain.rs +++ b/beacon_node/beacon_chain/src/beacon_chain.rs @@ -806,7 +806,7 @@ impl BeaconChain { block: BeaconBlock, ) -> Result { metrics::inc_counter(&metrics::BLOCK_PROCESSING_REQUESTS); - let timer = metrics::start_timer(&metrics::BLOCK_PROCESSING_TIMES); + let full_timer = metrics::start_timer(&metrics::BLOCK_PROCESSING_TIMES); let finalized_slot = self .state @@ -869,15 +869,25 @@ impl BeaconChain { metrics::stop_timer(db_read_timer); + let catchup_timer = metrics::start_timer(&metrics::BLOCK_PROCESSING_CATCHUP_STATE); + // Transition the parent state to the block slot. let mut state: BeaconState = parent_state; for _ in state.slot.as_u64()..block.slot.as_u64() { per_slot_processing(&mut state, &self.spec)?; } + metrics::stop_timer(catchup_timer); + + let commitee_timer = metrics::start_timer(&metrics::BLOCK_PROCESSING_COMMITTEE); + state.build_committee_cache(RelativeEpoch::Previous, &self.spec)?; state.build_committee_cache(RelativeEpoch::Current, &self.spec)?; + metrics::stop_timer(commitee_timer); + + let core_timer = metrics::start_timer(&metrics::BLOCK_PROCESSING_CORE); + // Apply the received block to its parent state (which has been transitioned into this // slot). match per_block_processing(&mut state, &block, &self.spec) { @@ -888,16 +898,29 @@ impl BeaconChain { _ => {} } + metrics::stop_timer(core_timer); + + let state_root_timer = metrics::start_timer(&metrics::BLOCK_PROCESSING_STATE_ROOT); + let state_root = state.canonical_root(); if block.state_root != state_root { return Ok(BlockProcessingOutcome::StateRootMismatch); } + metrics::stop_timer(state_root_timer); + + let db_write_timer = metrics::start_timer(&metrics::BLOCK_PROCESSING_DB_WRITE); + // Store the block and state. self.store.put(&block_root, &block)?; self.store.put(&state_root, &state)?; + metrics::stop_timer(db_write_timer); + + let fork_choice_register_timer = + metrics::start_timer(&metrics::BLOCK_PROCESSING_FORK_CHOICE_REGISTER); + // Register the new block with the fork choice service. if let Err(e) = self.fork_choice.process_block(&state, &block, block_root) { error!( @@ -909,6 +932,11 @@ impl BeaconChain { ) } + metrics::stop_timer(fork_choice_register_timer); + + let find_head_timer = + metrics::start_timer(&metrics::BLOCK_PROCESSING_FORK_CHOICE_FIND_HEAD); + // Execute the fork choice algorithm, enthroning a new head if discovered. // // Note: in the future we may choose to run fork-choice less often, potentially based upon @@ -921,12 +949,14 @@ impl BeaconChain { ) }; + metrics::stop_timer(find_head_timer); + metrics::inc_counter(&metrics::BLOCK_PROCESSING_SUCCESSES); metrics::observe( &metrics::OPERATIONS_PER_BLOCK_ATTESTATION, block.body.attestations.len() as f64, ); - metrics::stop_timer(timer); + metrics::stop_timer(full_timer); Ok(BlockProcessingOutcome::Processed { block_root }) } diff --git a/beacon_node/beacon_chain/src/metrics.rs b/beacon_node/beacon_chain/src/metrics.rs index 03f4783ff..38a7af9e1 100644 --- a/beacon_node/beacon_chain/src/metrics.rs +++ b/beacon_node/beacon_chain/src/metrics.rs @@ -4,10 +4,6 @@ lazy_static! { /* * Block Processing */ - pub static ref BLOCK_PROCESSING_DB_READ: Result = try_create_histogram( - "block_processing_db_read_times", - "Time spent loading block and state from DB" - ); pub static ref BLOCK_PROCESSING_REQUESTS: Result = try_create_int_counter( "block_processing_requests", "Count of blocks submitted for processing" @@ -18,6 +14,38 @@ lazy_static! { ); pub static ref BLOCK_PROCESSING_TIMES: Result = try_create_histogram("block_processing_times", "Full runtime of block processing"); + pub static ref BLOCK_PROCESSING_DB_READ: Result = try_create_histogram( + "block_processing_db_read_times", + "Time spent loading block and state from DB for block processing" + ); + pub static ref BLOCK_PROCESSING_CATCHUP_STATE: Result = try_create_histogram( + "block_processing_catch-up_state_times", + "Time spent skipping slots on a state before processing a block." + ); + pub static ref BLOCK_PROCESSING_COMMITTEE: Result = try_create_histogram( + "block_processing_committee_building_times", + "Time spent building/obtaining committees for block processing." + ); + pub static ref BLOCK_PROCESSING_CORE: Result = try_create_histogram( + "block_processing_core_times", + "Time spent doing the core per_block_processing state processing." + ); + pub static ref BLOCK_PROCESSING_STATE_ROOT: Result = try_create_histogram( + "block_processing_state_root_times", + "Time spent calculating the state root when processing a block." + ); + pub static ref BLOCK_PROCESSING_DB_WRITE: Result = try_create_histogram( + "block_processing_db_write_times", + "Time spent writing a newly processed block and state to DB" + ); + pub static ref BLOCK_PROCESSING_FORK_CHOICE_REGISTER: Result = try_create_histogram( + "block_processing_fork_choice_register_times", + "Time spent registering the new block with fork choice (but not finding head)" + ); + pub static ref BLOCK_PROCESSING_FORK_CHOICE_FIND_HEAD: Result = try_create_histogram( + "block_processing_fork_choice_find_head_times", + "Time spent finding the new head after processing a new block" + ); /* * Block Production From 76f42ac7ffd7d25e4c92393370b5b4717cacab49 Mon Sep 17 00:00:00 2001 From: Paul Hauner Date: Sun, 11 Aug 2019 16:15:26 +1000 Subject: [PATCH 07/26] Remove hypen from prometheus metric name --- beacon_node/beacon_chain/src/metrics.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/beacon_node/beacon_chain/src/metrics.rs b/beacon_node/beacon_chain/src/metrics.rs index 38a7af9e1..d0b6e27fc 100644 --- a/beacon_node/beacon_chain/src/metrics.rs +++ b/beacon_node/beacon_chain/src/metrics.rs @@ -19,7 +19,7 @@ lazy_static! { "Time spent loading block and state from DB for block processing" ); pub static ref BLOCK_PROCESSING_CATCHUP_STATE: Result = try_create_histogram( - "block_processing_catch-up_state_times", + "block_processing_catch_up_state_times", "Time spent skipping slots on a state before processing a block." ); pub static ref BLOCK_PROCESSING_COMMITTEE: Result = try_create_histogram( From 42d300bdc35df563598fcd65488b5fb21342a60b Mon Sep 17 00:00:00 2001 From: Paul Hauner Date: Sun, 11 Aug 2019 17:49:32 +1000 Subject: [PATCH 08/26] Add more beacon chain metrics --- beacon_node/beacon_chain/src/beacon_chain.rs | 18 ++++++++++++++---- beacon_node/beacon_chain/src/fork_choice.rs | 20 +++++++++++++++++--- beacon_node/beacon_chain/src/metrics.rs | 20 ++++++++++++++++++++ 3 files changed, 51 insertions(+), 7 deletions(-) diff --git a/beacon_node/beacon_chain/src/beacon_chain.rs b/beacon_node/beacon_chain/src/beacon_chain.rs index f5fb954b9..6f9a2b414 100644 --- a/beacon_node/beacon_chain/src/beacon_chain.rs +++ b/beacon_node/beacon_chain/src/beacon_chain.rs @@ -1060,11 +1060,8 @@ impl BeaconChain { // Determine the root of the block that is the head of the chain. let beacon_block_root = self.fork_choice.find_head(&self)?; - // End fork choice metrics timer. - metrics::stop_timer(timer); - // If a new head was chosen. - if beacon_block_root != self.head().beacon_block_root { + let result = if beacon_block_root != self.head().beacon_block_root { metrics::inc_counter(&metrics::FORK_CHOICE_CHANGED_HEAD); let beacon_block: BeaconBlock = self @@ -1127,11 +1124,22 @@ impl BeaconChain { } } else { Ok(()) + }; + + // End fork choice metrics timer. + metrics::stop_timer(timer); + + if let Err(_) = result { + metrics::inc_counter(&metrics::FORK_CHOICE_ERRORS); } + + result } /// Update the canonical head to `new_head`. fn update_canonical_head(&self, new_head: CheckPoint) -> Result<(), Error> { + let timer = metrics::start_timer(&metrics::UPDATE_HEAD_TIMES); + // Update the checkpoint that stores the head of the chain at the time it received the // block. *self.canonical_head.write() = new_head; @@ -1158,6 +1166,8 @@ impl BeaconChain { // Save `self` to `self.store`. self.persist()?; + metrics::stop_timer(timer); + Ok(()) } diff --git a/beacon_node/beacon_chain/src/fork_choice.rs b/beacon_node/beacon_chain/src/fork_choice.rs index edd426f29..77fdaacdc 100644 --- a/beacon_node/beacon_chain/src/fork_choice.rs +++ b/beacon_node/beacon_chain/src/fork_choice.rs @@ -1,4 +1,4 @@ -use crate::{BeaconChain, BeaconChainTypes}; +use crate::{metrics, BeaconChain, BeaconChainTypes}; use lmd_ghost::LmdGhost; use state_processing::common::get_attesting_indices; use std::sync::Arc; @@ -46,6 +46,8 @@ impl ForkChoice { } pub fn find_head(&self, chain: &BeaconChain) -> Result { + let timer = metrics::start_timer(&metrics::FORK_CHOICE_FIND_HEAD_TIMES); + let start_slot = |epoch: Epoch| epoch.start_slot(T::EthSpec::slots_per_epoch()); // From the specification: @@ -97,9 +99,14 @@ impl ForkChoice { .map(|v| v.effective_balance) }; - self.backend + let result = self + .backend .find_head(start_block_slot, start_block_root, weight) - .map_err(Into::into) + .map_err(Into::into); + + metrics::stop_timer(timer); + + result } /// Process all attestations in the given `block`. @@ -112,6 +119,7 @@ impl ForkChoice { block: &BeaconBlock, block_root: Hash256, ) -> Result<()> { + let timer = metrics::start_timer(&metrics::FORK_CHOICE_PROCESS_BLOCK_TIMES); // Note: we never count the block as a latest message, only attestations. // // I (Paul H) do not have an explicit reference to this, but I derive it from this @@ -136,6 +144,8 @@ impl ForkChoice { // a block that has the majority of votes applied to it. self.backend.process_block(block, block_root)?; + metrics::stop_timer(timer); + Ok(()) } @@ -148,6 +158,8 @@ impl ForkChoice { attestation: &Attestation, block: &BeaconBlock, ) -> Result<()> { + let timer = metrics::start_timer(&metrics::FORK_CHOICE_PROCESS_ATTESTATION_TIMES); + let block_hash = attestation.data.beacon_block_root; // Ignore any attestations to the zero hash. @@ -175,6 +187,8 @@ impl ForkChoice { } } + metrics::stop_timer(timer); + Ok(()) } diff --git a/beacon_node/beacon_chain/src/metrics.rs b/beacon_node/beacon_chain/src/metrics.rs index d0b6e27fc..34f359ad8 100644 --- a/beacon_node/beacon_chain/src/metrics.rs +++ b/beacon_node/beacon_chain/src/metrics.rs @@ -108,6 +108,10 @@ lazy_static! { "fork_choice_requests", "Count of occasions where fork choice has tried to find a head" ); + pub static ref FORK_CHOICE_ERRORS: Result = try_create_int_counter( + "fork_choice_errors", + "Count of occasions where fork choice has returned an error when trying to find a head" + ); pub static ref FORK_CHOICE_CHANGED_HEAD: Result = try_create_int_counter( "fork_choice_changed_head", "Count of occasions fork choice has found a new head" @@ -118,4 +122,20 @@ lazy_static! { ); pub static ref FORK_CHOICE_TIMES: Result = try_create_histogram("fork_choice_time", "Full runtime of fork choice"); + pub static ref FORK_CHOICE_FIND_HEAD_TIMES: Result = + try_create_histogram("fork_choice_find_head_time", "Full runtime of fork choice find_head function"); + pub static ref FORK_CHOICE_PROCESS_BLOCK_TIMES: Result = try_create_histogram( + "fork_choice_process_block_time", + "Time taken to add a block and all attestations to fork choice" + ); + pub static ref FORK_CHOICE_PROCESS_ATTESTATION_TIMES: Result = try_create_histogram( + "fork_choice_process_attestation_time", + "Time taken to add an attestation to fork choice" + ); + + /* + * Head Updating + */ + pub static ref UPDATE_HEAD_TIMES: Result = + try_create_histogram("update_head_times", "Time taken to update the canonical head"); } From 78db947e6e65f4d0960ca5b9340305c663856244 Mon Sep 17 00:00:00 2001 From: Paul Hauner Date: Sun, 11 Aug 2019 18:28:57 +1000 Subject: [PATCH 09/26] Add beacon chain persistence metric --- beacon_node/beacon_chain/src/beacon_chain.rs | 4 ++++ beacon_node/beacon_chain/src/metrics.rs | 6 ++++++ 2 files changed, 10 insertions(+) diff --git a/beacon_node/beacon_chain/src/beacon_chain.rs b/beacon_node/beacon_chain/src/beacon_chain.rs index 6f9a2b414..96ff339a6 100644 --- a/beacon_node/beacon_chain/src/beacon_chain.rs +++ b/beacon_node/beacon_chain/src/beacon_chain.rs @@ -199,6 +199,8 @@ impl BeaconChain { /// Attempt to save this instance to `self.store`. pub fn persist(&self) -> Result<(), Error> { + let timer = metrics::start_timer(&metrics::PERSIST_CHAIN); + let p: PersistedBeaconChain = PersistedBeaconChain { canonical_head: self.canonical_head.read().clone(), op_pool: PersistedOperationPool::from_operation_pool(&self.op_pool), @@ -209,6 +211,8 @@ impl BeaconChain { let key = Hash256::from_slice(&BEACON_CHAIN_DB_KEY.as_bytes()); self.store.put(&key, &p)?; + metrics::stop_timer(timer); + Ok(()) } diff --git a/beacon_node/beacon_chain/src/metrics.rs b/beacon_node/beacon_chain/src/metrics.rs index 34f359ad8..b91125463 100644 --- a/beacon_node/beacon_chain/src/metrics.rs +++ b/beacon_node/beacon_chain/src/metrics.rs @@ -138,4 +138,10 @@ lazy_static! { */ pub static ref UPDATE_HEAD_TIMES: Result = try_create_histogram("update_head_times", "Time taken to update the canonical head"); + + /* + * Persisting BeaconChain to disk + */ + pub static ref PERSIST_CHAIN: Result = + try_create_histogram("persist_chain", "Time taken to update the canonical head"); } From 6150f0ae1a549dcc1d76c831c4ca5cae03300dd7 Mon Sep 17 00:00:00 2001 From: Paul Hauner Date: Sun, 11 Aug 2019 18:29:11 +1000 Subject: [PATCH 10/26] Prune op pool on finalization --- beacon_node/beacon_chain/src/beacon_chain.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/beacon_node/beacon_chain/src/beacon_chain.rs b/beacon_node/beacon_chain/src/beacon_chain.rs index 96ff339a6..0e0583309 100644 --- a/beacon_node/beacon_chain/src/beacon_chain.rs +++ b/beacon_node/beacon_chain/src/beacon_chain.rs @@ -1199,6 +1199,9 @@ impl BeaconChain { self.fork_choice .process_finalization(&finalized_block, finalized_block_root)?; + self.op_pool + .prune_all(&self.head().beacon_state, &self.spec); + Ok(()) } } From 7140dbc45da4a8895a155450f1777fa1655991ac Mon Sep 17 00:00:00 2001 From: Paul Hauner Date: Mon, 12 Aug 2019 13:26:58 +1000 Subject: [PATCH 11/26] Add extra prom beacon chain metrics --- beacon_node/beacon_chain/Cargo.toml | 2 +- beacon_node/beacon_chain/src/lib.rs | 1 + beacon_node/beacon_chain/src/metrics.rs | 156 ++++++++++++++++++++++- beacon_node/client/src/lib.rs | 1 + beacon_node/rest_api/src/lib.rs | 20 ++- beacon_node/rest_api/src/metrics.rs | 18 ++- beacon_node/store/Cargo.toml | 2 + beacon_node/store/src/lib.rs | 4 + beacon_node/store/src/metrics.rs | 25 ++++ eth2/utils/lighthouse_metrics/src/lib.rs | 15 ++- 10 files changed, 233 insertions(+), 11 deletions(-) create mode 100644 beacon_node/store/src/metrics.rs diff --git a/beacon_node/beacon_chain/Cargo.toml b/beacon_node/beacon_chain/Cargo.toml index 850aa2e94..462d44e92 100644 --- a/beacon_node/beacon_chain/Cargo.toml +++ b/beacon_node/beacon_chain/Cargo.toml @@ -7,6 +7,7 @@ edition = "2018" [dependencies] store = { path = "../store" } parking_lot = "0.7" +lazy_static = "1.3.0" lighthouse_metrics = { path = "../../eth2/utils/lighthouse_metrics" } log = "0.4" operation_pool = { path = "../../eth2/operation_pool" } @@ -17,7 +18,6 @@ sloggers = { version = "^0.3" } slot_clock = { path = "../../eth2/utils/slot_clock" } eth2_ssz = "0.1" eth2_ssz_derive = "0.1" -lazy_static = "1.3.0" state_processing = { path = "../../eth2/state_processing" } tree_hash = "0.1" types = { path = "../../eth2/types" } diff --git a/beacon_node/beacon_chain/src/lib.rs b/beacon_node/beacon_chain/src/lib.rs index 98bd60a35..1262bc537 100644 --- a/beacon_node/beacon_chain/src/lib.rs +++ b/beacon_node/beacon_chain/src/lib.rs @@ -16,6 +16,7 @@ pub use self::beacon_chain::{ pub use self::checkpoint::CheckPoint; pub use self::errors::{BeaconChainError, BlockProductionError}; pub use lmd_ghost; +pub use metrics::scrape_for_metrics; pub use parking_lot; pub use slot_clock; pub use state_processing::per_block_processing::errors::{ diff --git a/beacon_node/beacon_chain/src/metrics.rs b/beacon_node/beacon_chain/src/metrics.rs index b91125463..6ed8218f0 100644 --- a/beacon_node/beacon_chain/src/metrics.rs +++ b/beacon_node/beacon_chain/src/metrics.rs @@ -1,4 +1,6 @@ +use crate::{BeaconChain, BeaconChainTypes}; pub use lighthouse_metrics::*; +use types::{BeaconState, Epoch, EthSpec, Hash256, Slot}; lazy_static! { /* @@ -133,15 +135,157 @@ lazy_static! { "Time taken to add an attestation to fork choice" ); - /* - * Head Updating - */ - pub static ref UPDATE_HEAD_TIMES: Result = - try_create_histogram("update_head_times", "Time taken to update the canonical head"); - /* * Persisting BeaconChain to disk */ pub static ref PERSIST_CHAIN: Result = try_create_histogram("persist_chain", "Time taken to update the canonical head"); } + +// Lazy-static is split so we don't reach the crate-level recursion limit. +lazy_static! { + /* + * Slot Clock + */ + pub static ref PRESENT_SLOT: Result = + try_create_int_gauge("present_slot", "The present slot, according to system time"); + pub static ref PRESENT_EPOCH: Result = + try_create_int_gauge("present_epoch", "The present epoch, according to system time"); + + /* + * Chain Head + */ + pub static ref UPDATE_HEAD_TIMES: Result = + try_create_histogram("update_head_times", "Time taken to update the canonical head"); + pub static ref HEAD_STATE_SLOT: Result = + try_create_int_gauge("head_state_slot", "Slot of the block at the head of the chain"); + pub static ref HEAD_STATE_ROOT: Result = + try_create_int_gauge("head_state_root", "Root of the block at the head of the chain"); + pub static ref HEAD_STATE_LATEST_BLOCK_SLOT: Result = + try_create_int_gauge("head_state_latest_block_slot", "Latest block slot at the head of the chain"); + pub static ref HEAD_STATE_CURRENT_JUSTIFIED_ROOT: Result = + try_create_int_gauge("head_state_current_justified_root", "Current justified root at the head of the chain"); + pub static ref HEAD_STATE_CURRENT_JUSTIFIED_EPOCH: Result = + try_create_int_gauge("head_state_current_justified_epoch", "Current justified epoch at the head of the chain"); + pub static ref HEAD_STATE_PREVIOUS_JUSTIFIED_ROOT: Result = + try_create_int_gauge("head_state_previous_justified_root", "Previous justified root at the head of the chain"); + pub static ref HEAD_STATE_PREVIOUS_JUSTIFIED_EPOCH: Result = + try_create_int_gauge("head_state_previous_justified_epoch", "Previous justified epoch at the head of the chain"); + pub static ref HEAD_STATE_FINALIZED_ROOT: Result = + try_create_int_gauge("head_state_finalized_root", "Finalized root at the head of the chain"); + pub static ref HEAD_STATE_FINALIZED_EPOCH: Result = + try_create_int_gauge("head_state_finalized_epoch", "Finalized epoch at the head of the chain"); + pub static ref HEAD_STATE_TOTAL_VALIDATORS: Result = + try_create_int_gauge("head_state_total_validators", "Count of validators at the head of the chain"); + pub static ref HEAD_STATE_ACTIVE_VALIDATORS: Result = + try_create_int_gauge("head_state_active_validators", "Count of active validators at the head of the chain"); + pub static ref HEAD_STATE_VALIDATOR_BALANCES: Result = + try_create_int_gauge("head_state_validator_balances", "Sum of all validator balances at the head of the chain"); + pub static ref HEAD_STATE_SLASHED_VALIDATORS: Result = + try_create_int_gauge("head_state_slashed_validators", "Count of all slashed validators at the head of the chain"); + pub static ref HEAD_STATE_WITHDRAWN_VALIDATORS: Result = + try_create_int_gauge("head_state_withdrawn_validators", "Sum of all validator balances at the head of the chain"); + pub static ref HEAD_STATE_ETH1_DEPOSIT_INDEX: Result = + try_create_int_gauge("head_state_eth1_deposit_index", "Eth1 deposit index at the head of the chain"); +} + +/// Scrape the `beacon_chain` for metrics that are not constantly updated (e.g., the present slot, +/// head state info, etc) and update the Prometheus `DEFAULT_REGISTRY`. +pub fn scrape_for_metrics(beacon_chain: &BeaconChain) { + set_gauge_by_slot( + &PRESENT_SLOT, + beacon_chain + .read_slot_clock() + .unwrap_or_else(|| Slot::new(0)), + ); + + set_gauge_by_epoch( + &PRESENT_EPOCH, + beacon_chain + .read_slot_clock() + .map(|s| s.epoch(T::EthSpec::slots_per_epoch())) + .unwrap_or_else(|| Epoch::new(0)), + ); + + scrape_head_state::( + &beacon_chain.head().beacon_state, + beacon_chain.head().beacon_state_root, + ); +} + +/// Scrape the given `state` assuming it's the head state, updating the `DEFAULT_REGISTRY`. +fn scrape_head_state(state: &BeaconState, state_root: Hash256) { + set_gauge_by_slot(&HEAD_STATE_SLOT, state.slot); + set_gauge_by_hash(&HEAD_STATE_ROOT, state_root); + set_gauge_by_slot( + &HEAD_STATE_LATEST_BLOCK_SLOT, + state.latest_block_header.slot, + ); + set_gauge_by_hash( + &HEAD_STATE_CURRENT_JUSTIFIED_ROOT, + state.current_justified_checkpoint.root, + ); + set_gauge_by_epoch( + &HEAD_STATE_CURRENT_JUSTIFIED_EPOCH, + state.current_justified_checkpoint.epoch, + ); + set_gauge_by_hash( + &HEAD_STATE_PREVIOUS_JUSTIFIED_ROOT, + state.previous_justified_checkpoint.root, + ); + set_gauge_by_epoch( + &HEAD_STATE_PREVIOUS_JUSTIFIED_EPOCH, + state.previous_justified_checkpoint.epoch, + ); + set_gauge_by_hash(&HEAD_STATE_FINALIZED_ROOT, state.finalized_checkpoint.root); + set_gauge_by_epoch( + &HEAD_STATE_FINALIZED_EPOCH, + state.finalized_checkpoint.epoch, + ); + set_gauge_by_usize(&HEAD_STATE_TOTAL_VALIDATORS, state.validators.len()); + set_gauge_by_u64( + &HEAD_STATE_VALIDATOR_BALANCES, + state.balances.iter().fold(0_u64, |acc, i| acc + i), + ); + set_gauge_by_usize( + &HEAD_STATE_ACTIVE_VALIDATORS, + state + .validators + .iter() + .filter(|v| v.is_active_at(state.current_epoch())) + .count(), + ); + set_gauge_by_usize( + &HEAD_STATE_SLASHED_VALIDATORS, + state.validators.iter().filter(|v| v.slashed).count(), + ); + set_gauge_by_usize( + &HEAD_STATE_WITHDRAWN_VALIDATORS, + state + .validators + .iter() + .filter(|v| v.is_withdrawable_at(state.current_epoch())) + .count(), + ); + set_gauge_by_u64(&HEAD_STATE_ETH1_DEPOSIT_INDEX, state.eth1_deposit_index); +} + +fn set_gauge_by_slot(gauge: &Result, value: Slot) { + set_gauge(gauge, value.as_u64() as i64); +} + +fn set_gauge_by_epoch(gauge: &Result, value: Epoch) { + set_gauge(gauge, value.as_u64() as i64); +} + +fn set_gauge_by_hash(gauge: &Result, value: Hash256) { + set_gauge(gauge, value.to_low_u64_le() as i64); +} + +fn set_gauge_by_usize(gauge: &Result, value: usize) { + set_gauge(gauge, value as i64); +} + +fn set_gauge_by_u64(gauge: &Result, value: u64) { + set_gauge(gauge, value as i64); +} diff --git a/beacon_node/client/src/lib.rs b/beacon_node/client/src/lib.rs index e06c5b60e..c74787f60 100644 --- a/beacon_node/client/src/lib.rs +++ b/beacon_node/client/src/lib.rs @@ -142,6 +142,7 @@ where &client_config.rest_api, executor, beacon_chain.clone(), + client_config.db_path().expect("unable to read datadir"), &log, ) { Ok(s) => Some(s), diff --git a/beacon_node/rest_api/src/lib.rs b/beacon_node/rest_api/src/lib.rs index 7dc0df578..fea67618b 100644 --- a/beacon_node/rest_api/src/lib.rs +++ b/beacon_node/rest_api/src/lib.rs @@ -13,6 +13,8 @@ use hyper::rt::Future; use hyper::service::service_fn_ok; use hyper::{Body, Method, Response, Server, StatusCode}; use slog::{info, o, warn}; +use std::ops::Deref; +use std::path::PathBuf; use std::sync::Arc; use tokio::runtime::TaskExecutor; use url_query::UrlQuery; @@ -68,6 +70,7 @@ pub fn start_server( config: &ApiConfig, executor: &TaskExecutor, beacon_chain: Arc>, + db_path: PathBuf, log: &slog::Logger, ) -> Result { let log = log.new(o!("Service" => "Api")); @@ -81,6 +84,8 @@ pub fn start_server( Ok(()) }); + let db_path = DBPath(db_path); + // Get the address to bind to let bind_addr = (config.listen_address, config.port).into(); @@ -91,12 +96,14 @@ pub fn start_server( let service = move || { let log = server_log.clone(); let beacon_chain = server_bc.clone(); + let db_path = db_path.clone(); // Create a simple handler for the router, inject our stateful objects into the request. service_fn_ok(move |mut req| { req.extensions_mut().insert::(log.clone()); req.extensions_mut() .insert::>>(beacon_chain.clone()); + req.extensions_mut().insert::(db_path.clone()); let path = req.uri().path().to_string(); @@ -104,7 +111,7 @@ pub fn start_server( let result = match (req.method(), path.as_ref()) { (&Method::GET, "/beacon/state") => beacon::get_state::(req), (&Method::GET, "/beacon/state_root") => beacon::get_state_root::(req), - (&Method::GET, "/metrics") => metrics::get_prometheus(req), + (&Method::GET, "/metrics") => metrics::get_prometheus::(req), (&Method::GET, "/node/version") => node::get_version(req), (&Method::GET, "/node/genesis_time") => node::get_genesis_time::(req), _ => Err(ApiError::MethodNotAllowed(path.clone())), @@ -154,3 +161,14 @@ fn success_response(body: Body) -> Response { .body(body) .expect("We should always be able to make response from the success body.") } + +#[derive(Clone)] +pub struct DBPath(PathBuf); + +impl Deref for DBPath { + type Target = PathBuf; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} diff --git a/beacon_node/rest_api/src/metrics.rs b/beacon_node/rest_api/src/metrics.rs index b0f5b8605..0cd700c44 100644 --- a/beacon_node/rest_api/src/metrics.rs +++ b/beacon_node/rest_api/src/metrics.rs @@ -1,12 +1,26 @@ -use crate::{success_response, ApiError, ApiResult}; +use crate::{success_response, ApiError, ApiResult, DBPath}; +use beacon_chain::{BeaconChain, BeaconChainTypes}; use hyper::{Body, Request}; use prometheus::{Encoder, TextEncoder}; +use std::sync::Arc; /// Returns the full set of Prometheus metrics for the Beacon Node application. -pub fn get_prometheus(_req: Request) -> ApiResult { +pub fn get_prometheus(req: Request) -> ApiResult { let mut buffer = vec![]; let encoder = TextEncoder::new(); + let beacon_chain = req + .extensions() + .get::>>() + .ok_or_else(|| ApiError::ServerError("Beacon chain extension missing".to_string()))?; + let db_path = req + .extensions() + .get::() + .ok_or_else(|| ApiError::ServerError("DBPath extension missing".to_string()))?; + + store::scrape_for_metrics(&db_path); + beacon_chain::scrape_for_metrics(&beacon_chain); + encoder.encode(&prometheus::gather(), &mut buffer).unwrap(); String::from_utf8(buffer) diff --git a/beacon_node/store/Cargo.toml b/beacon_node/store/Cargo.toml index 9607e8b8e..cd9711253 100644 --- a/beacon_node/store/Cargo.toml +++ b/beacon_node/store/Cargo.toml @@ -15,3 +15,5 @@ eth2_ssz = "0.1" eth2_ssz_derive = "0.1" tree_hash = "0.1" types = { path = "../../eth2/types" } +lazy_static = "1.3.0" +lighthouse_metrics = { path = "../../eth2/utils/lighthouse_metrics" } diff --git a/beacon_node/store/src/lib.rs b/beacon_node/store/src/lib.rs index 5b8d58320..9c0e3cbae 100644 --- a/beacon_node/store/src/lib.rs +++ b/beacon_node/store/src/lib.rs @@ -7,18 +7,22 @@ //! //! Provides a simple API for storing/retrieving all types that sometimes needs type-hints. See //! tests for implementation examples. +#[macro_use] +extern crate lazy_static; mod block_at_slot; mod errors; mod impls; mod leveldb_store; mod memory_store; +mod metrics; pub mod iter; pub use self::leveldb_store::LevelDB as DiskStore; pub use self::memory_store::MemoryStore; pub use errors::Error; +pub use metrics::scrape_for_metrics; pub use types::*; /// An object capable of storing and retrieving objects implementing `StoreItem`. diff --git a/beacon_node/store/src/metrics.rs b/beacon_node/store/src/metrics.rs new file mode 100644 index 000000000..b6a055f10 --- /dev/null +++ b/beacon_node/store/src/metrics.rs @@ -0,0 +1,25 @@ +pub use lighthouse_metrics::{set_gauge, try_create_int_gauge, *}; + +use std::fs; +use std::path::PathBuf; + +lazy_static! { + pub static ref DISK_DB_SIZE: Result = + try_create_int_gauge("database_size", "Size of the on-disk database (bytes)"); +} + +/// Updates the global metrics registry with store-related information. +pub fn scrape_for_metrics(db_path: &PathBuf) { + let db_size = if let Ok(iter) = fs::read_dir(db_path) { + iter.filter_map(std::result::Result::ok) + .map(size_of_dir_entry) + .fold(0_u64, |sum, val| sum + val) + } else { + 0 + }; + set_gauge(&DISK_DB_SIZE, db_size as i64); +} + +fn size_of_dir_entry(dir: fs::DirEntry) -> u64 { + dir.metadata().map(|m| m.len()).unwrap_or(0) +} diff --git a/eth2/utils/lighthouse_metrics/src/lib.rs b/eth2/utils/lighthouse_metrics/src/lib.rs index e6e30f6bb..d55fcd3e2 100644 --- a/eth2/utils/lighthouse_metrics/src/lib.rs +++ b/eth2/utils/lighthouse_metrics/src/lib.rs @@ -1,6 +1,6 @@ use prometheus::{HistogramOpts, HistogramTimer, Opts}; -pub use prometheus::{Histogram, IntCounter, Result}; +pub use prometheus::{Histogram, IntCounter, IntGauge, Result}; pub fn try_create_int_counter(name: &str, help: &str) -> Result { let opts = Opts::new(name, help); @@ -9,6 +9,13 @@ pub fn try_create_int_counter(name: &str, help: &str) -> Result { Ok(counter) } +pub fn try_create_int_gauge(name: &str, help: &str) -> Result { + let opts = Opts::new(name, help); + let gauge = IntGauge::with_opts(opts)?; + prometheus::register(Box::new(gauge.clone()))?; + Ok(gauge) +} + pub fn try_create_histogram(name: &str, help: &str) -> Result { let opts = HistogramOpts::new(name, help); let histogram = Histogram::with_opts(opts)?; @@ -34,6 +41,12 @@ pub fn inc_counter(counter: &Result) { } } +pub fn set_gauge(gauge: &Result, value: i64) { + if let Ok(gauge) = gauge { + gauge.set(value); + } +} + pub fn observe(histogram: &Result, value: f64) { if let Ok(histogram) = histogram { histogram.observe(value); From 913ee4694eb4310b08def5feef4f111233b6c3e5 Mon Sep 17 00:00:00 2001 From: Paul Hauner Date: Mon, 12 Aug 2019 13:35:16 +1000 Subject: [PATCH 12/26] Prefix BeaconChain metrics with "beacon_" --- beacon_node/beacon_chain/src/metrics.rs | 94 ++++++++++++------------- 1 file changed, 47 insertions(+), 47 deletions(-) diff --git a/beacon_node/beacon_chain/src/metrics.rs b/beacon_node/beacon_chain/src/metrics.rs index 6ed8218f0..227f1090f 100644 --- a/beacon_node/beacon_chain/src/metrics.rs +++ b/beacon_node/beacon_chain/src/metrics.rs @@ -7,45 +7,45 @@ lazy_static! { * Block Processing */ pub static ref BLOCK_PROCESSING_REQUESTS: Result = try_create_int_counter( - "block_processing_requests", + "beacon_block_processing_requests", "Count of blocks submitted for processing" ); pub static ref BLOCK_PROCESSING_SUCCESSES: Result = try_create_int_counter( - "block_processing_successes", + "beacon_block_processing_successes", "Count of blocks processed without error" ); pub static ref BLOCK_PROCESSING_TIMES: Result = try_create_histogram("block_processing_times", "Full runtime of block processing"); pub static ref BLOCK_PROCESSING_DB_READ: Result = try_create_histogram( - "block_processing_db_read_times", + "beacon_block_processing_db_read_times", "Time spent loading block and state from DB for block processing" ); pub static ref BLOCK_PROCESSING_CATCHUP_STATE: Result = try_create_histogram( - "block_processing_catch_up_state_times", + "beacon_block_processing_catch_up_state_times", "Time spent skipping slots on a state before processing a block." ); pub static ref BLOCK_PROCESSING_COMMITTEE: Result = try_create_histogram( - "block_processing_committee_building_times", + "beacon_block_processing_committee_building_times", "Time spent building/obtaining committees for block processing." ); pub static ref BLOCK_PROCESSING_CORE: Result = try_create_histogram( - "block_processing_core_times", + "beacon_block_processing_core_times", "Time spent doing the core per_block_processing state processing." ); pub static ref BLOCK_PROCESSING_STATE_ROOT: Result = try_create_histogram( - "block_processing_state_root_times", + "beacon_block_processing_state_root_times", "Time spent calculating the state root when processing a block." ); pub static ref BLOCK_PROCESSING_DB_WRITE: Result = try_create_histogram( - "block_processing_db_write_times", + "beacon_block_processing_db_write_times", "Time spent writing a newly processed block and state to DB" ); pub static ref BLOCK_PROCESSING_FORK_CHOICE_REGISTER: Result = try_create_histogram( - "block_processing_fork_choice_register_times", + "beacon_block_processing_fork_choice_register_times", "Time spent registering the new block with fork choice (but not finding head)" ); pub static ref BLOCK_PROCESSING_FORK_CHOICE_FIND_HEAD: Result = try_create_histogram( - "block_processing_fork_choice_find_head_times", + "beacon_block_processing_fork_choice_find_head_times", "Time spent finding the new head after processing a new block" ); @@ -53,21 +53,21 @@ lazy_static! { * Block Production */ pub static ref BLOCK_PRODUCTION_REQUESTS: Result = try_create_int_counter( - "block_production_requests", + "beacon_block_production_requests", "Count of all block production requests" ); pub static ref BLOCK_PRODUCTION_SUCCESSES: Result = try_create_int_counter( - "block_production_successes", + "beacon_block_production_successes", "Count of blocks successfully produced." ); pub static ref BLOCK_PRODUCTION_TIMES: Result = - try_create_histogram("block_production_times", "Full runtime of block production"); + try_create_histogram("beacon_block_production_times", "Full runtime of block production"); /* * Block Statistics */ pub static ref OPERATIONS_PER_BLOCK_ATTESTATION: Result = try_create_histogram( - "operations_per_block_attestation", + "beacon_operations_per_block_attestation", "Number of attestations in a block" ); @@ -75,15 +75,15 @@ lazy_static! { * Attestation Processing */ pub static ref ATTESTATION_PROCESSING_REQUESTS: Result = try_create_int_counter( - "attestation_processing_requests", + "beacon_attestation_processing_requests", "Count of all attestations submitted for processing" ); pub static ref ATTESTATION_PROCESSING_SUCCESSES: Result = try_create_int_counter( - "attestation_processing_successes", + "beacon_attestation_processing_successes", "total_attestation_processing_successes" ); pub static ref ATTESTATION_PROCESSING_TIMES: Result = try_create_histogram( - "attestation_processing_times", + "beacon_attestation_processing_times", "Full runtime of attestation processing" ); @@ -91,15 +91,15 @@ lazy_static! { * Attestation Production */ pub static ref ATTESTATION_PRODUCTION_REQUESTS: Result = try_create_int_counter( - "attestation_production_requests", + "beacon_attestation_production_requests", "Count of all attestation production requests" ); pub static ref ATTESTATION_PRODUCTION_SUCCESSES: Result = try_create_int_counter( - "attestation_production_successes", + "beacon_attestation_production_successes", "Count of attestations processed without error" ); pub static ref ATTESTATION_PRODUCTION_TIMES: Result = try_create_histogram( - "attestation_production_times", + "beacon_attestation_production_times", "Full runtime of attestation production" ); @@ -107,31 +107,31 @@ lazy_static! { * Fork Choice */ pub static ref FORK_CHOICE_REQUESTS: Result = try_create_int_counter( - "fork_choice_requests", + "beacon_fork_choice_requests", "Count of occasions where fork choice has tried to find a head" ); pub static ref FORK_CHOICE_ERRORS: Result = try_create_int_counter( - "fork_choice_errors", + "beacon_fork_choice_errors", "Count of occasions where fork choice has returned an error when trying to find a head" ); pub static ref FORK_CHOICE_CHANGED_HEAD: Result = try_create_int_counter( - "fork_choice_changed_head", + "beacon_fork_choice_changed_head", "Count of occasions fork choice has found a new head" ); pub static ref FORK_CHOICE_REORG_COUNT: Result = try_create_int_counter( - "fork_choice_reorg_count", + "beacon_fork_choice_reorg_count", "Count of occasions fork choice has switched to a different chain" ); pub static ref FORK_CHOICE_TIMES: Result = - try_create_histogram("fork_choice_time", "Full runtime of fork choice"); + try_create_histogram("beacon_fork_choice_time", "Full runtime of fork choice"); pub static ref FORK_CHOICE_FIND_HEAD_TIMES: Result = - try_create_histogram("fork_choice_find_head_time", "Full runtime of fork choice find_head function"); + try_create_histogram("beacon_fork_choice_find_head_time", "Full runtime of fork choice find_head function"); pub static ref FORK_CHOICE_PROCESS_BLOCK_TIMES: Result = try_create_histogram( - "fork_choice_process_block_time", + "beacon_fork_choice_process_block_time", "Time taken to add a block and all attestations to fork choice" ); pub static ref FORK_CHOICE_PROCESS_ATTESTATION_TIMES: Result = try_create_histogram( - "fork_choice_process_attestation_time", + "beacon_fork_choice_process_attestation_time", "Time taken to add an attestation to fork choice" ); @@ -139,7 +139,7 @@ lazy_static! { * Persisting BeaconChain to disk */ pub static ref PERSIST_CHAIN: Result = - try_create_histogram("persist_chain", "Time taken to update the canonical head"); + try_create_histogram("beacon_persist_chain", "Time taken to update the canonical head"); } // Lazy-static is split so we don't reach the crate-level recursion limit. @@ -148,45 +148,45 @@ lazy_static! { * Slot Clock */ pub static ref PRESENT_SLOT: Result = - try_create_int_gauge("present_slot", "The present slot, according to system time"); + try_create_int_gauge("beacon_present_slot", "The present slot, according to system time"); pub static ref PRESENT_EPOCH: Result = - try_create_int_gauge("present_epoch", "The present epoch, according to system time"); + try_create_int_gauge("beacon_present_epoch", "The present epoch, according to system time"); /* * Chain Head */ pub static ref UPDATE_HEAD_TIMES: Result = - try_create_histogram("update_head_times", "Time taken to update the canonical head"); + try_create_histogram("beacon_update_head_times", "Time taken to update the canonical head"); pub static ref HEAD_STATE_SLOT: Result = - try_create_int_gauge("head_state_slot", "Slot of the block at the head of the chain"); + try_create_int_gauge("beacon_head_state_slot", "Slot of the block at the head of the chain"); pub static ref HEAD_STATE_ROOT: Result = - try_create_int_gauge("head_state_root", "Root of the block at the head of the chain"); + try_create_int_gauge("beacon_head_state_root", "Root of the block at the head of the chain"); pub static ref HEAD_STATE_LATEST_BLOCK_SLOT: Result = - try_create_int_gauge("head_state_latest_block_slot", "Latest block slot at the head of the chain"); + try_create_int_gauge("beacon_head_state_latest_block_slot", "Latest block slot at the head of the chain"); pub static ref HEAD_STATE_CURRENT_JUSTIFIED_ROOT: Result = - try_create_int_gauge("head_state_current_justified_root", "Current justified root at the head of the chain"); + try_create_int_gauge("beacon_head_state_current_justified_root", "Current justified root at the head of the chain"); pub static ref HEAD_STATE_CURRENT_JUSTIFIED_EPOCH: Result = - try_create_int_gauge("head_state_current_justified_epoch", "Current justified epoch at the head of the chain"); + try_create_int_gauge("beacon_head_state_current_justified_epoch", "Current justified epoch at the head of the chain"); pub static ref HEAD_STATE_PREVIOUS_JUSTIFIED_ROOT: Result = - try_create_int_gauge("head_state_previous_justified_root", "Previous justified root at the head of the chain"); + try_create_int_gauge("beacon_head_state_previous_justified_root", "Previous justified root at the head of the chain"); pub static ref HEAD_STATE_PREVIOUS_JUSTIFIED_EPOCH: Result = - try_create_int_gauge("head_state_previous_justified_epoch", "Previous justified epoch at the head of the chain"); + try_create_int_gauge("beacon_head_state_previous_justified_epoch", "Previous justified epoch at the head of the chain"); pub static ref HEAD_STATE_FINALIZED_ROOT: Result = - try_create_int_gauge("head_state_finalized_root", "Finalized root at the head of the chain"); + try_create_int_gauge("beacon_head_state_finalized_root", "Finalized root at the head of the chain"); pub static ref HEAD_STATE_FINALIZED_EPOCH: Result = - try_create_int_gauge("head_state_finalized_epoch", "Finalized epoch at the head of the chain"); + try_create_int_gauge("beacon_head_state_finalized_epoch", "Finalized epoch at the head of the chain"); pub static ref HEAD_STATE_TOTAL_VALIDATORS: Result = - try_create_int_gauge("head_state_total_validators", "Count of validators at the head of the chain"); + try_create_int_gauge("beacon_head_state_total_validators", "Count of validators at the head of the chain"); pub static ref HEAD_STATE_ACTIVE_VALIDATORS: Result = - try_create_int_gauge("head_state_active_validators", "Count of active validators at the head of the chain"); + try_create_int_gauge("beacon_head_state_active_validators", "Count of active validators at the head of the chain"); pub static ref HEAD_STATE_VALIDATOR_BALANCES: Result = - try_create_int_gauge("head_state_validator_balances", "Sum of all validator balances at the head of the chain"); + try_create_int_gauge("beacon_head_state_validator_balances", "Sum of all validator balances at the head of the chain"); pub static ref HEAD_STATE_SLASHED_VALIDATORS: Result = - try_create_int_gauge("head_state_slashed_validators", "Count of all slashed validators at the head of the chain"); + try_create_int_gauge("beacon_head_state_slashed_validators", "Count of all slashed validators at the head of the chain"); pub static ref HEAD_STATE_WITHDRAWN_VALIDATORS: Result = - try_create_int_gauge("head_state_withdrawn_validators", "Sum of all validator balances at the head of the chain"); + try_create_int_gauge("beacon_head_state_withdrawn_validators", "Sum of all validator balances at the head of the chain"); pub static ref HEAD_STATE_ETH1_DEPOSIT_INDEX: Result = - try_create_int_gauge("head_state_eth1_deposit_index", "Eth1 deposit index at the head of the chain"); + try_create_int_gauge("beacon_head_state_eth1_deposit_index", "Eth1 deposit index at the head of the chain"); } /// Scrape the `beacon_chain` for metrics that are not constantly updated (e.g., the present slot, From 0b4a8893a4a94826b723e39b29ea8aaf64bb8912 Mon Sep 17 00:00:00 2001 From: Paul Hauner Date: Mon, 12 Aug 2019 13:49:09 +1000 Subject: [PATCH 13/26] Add more store metrics --- beacon_node/store/src/leveldb_store.rs | 22 ++++++++++++++++++-- beacon_node/store/src/metrics.rs | 26 +++++++++++++++++++++++- eth2/utils/lighthouse_metrics/src/lib.rs | 6 ++++++ 3 files changed, 51 insertions(+), 3 deletions(-) diff --git a/beacon_node/store/src/leveldb_store.rs b/beacon_node/store/src/leveldb_store.rs index 699861e3a..a085d845a 100644 --- a/beacon_node/store/src/leveldb_store.rs +++ b/beacon_node/store/src/leveldb_store.rs @@ -1,4 +1,5 @@ use super::*; +use crate::metrics; use db_key::Key; use leveldb::database::kv::KV; use leveldb::database::Database; @@ -62,15 +63,27 @@ impl Store for LevelDB { fn get_bytes(&self, col: &str, key: &[u8]) -> Result>, Error> { let column_key = Self::get_key_for_col(col, key); - self.db + metrics::inc_counter(&metrics::DISK_DB_READ_COUNT); + + let result = self + .db .get(self.read_options(), column_key) - .map_err(Into::into) + .map_err(Into::into); + + if let Ok(Some(bytes)) = &result { + metrics::inc_counter_by(&metrics::DISK_DB_READ_BYTES, bytes.len() as i64) + } + + result } /// Store some `value` in `column`, indexed with `key`. fn put_bytes(&self, col: &str, key: &[u8], val: &[u8]) -> Result<(), Error> { let column_key = Self::get_key_for_col(col, key); + metrics::inc_counter(&metrics::DISK_DB_WRITE_COUNT); + metrics::inc_counter_by(&metrics::DISK_DB_WRITE_BYTES, val.len() as i64); + self.db .put(self.write_options(), column_key, val) .map_err(Into::into) @@ -80,6 +93,8 @@ impl Store for LevelDB { fn key_exists(&self, col: &str, key: &[u8]) -> Result { let column_key = Self::get_key_for_col(col, key); + metrics::inc_counter(&metrics::DISK_DB_EXISTS_COUNT); + self.db .get(self.read_options(), column_key) .map_err(Into::into) @@ -89,6 +104,9 @@ impl Store for LevelDB { /// Removes `key` from `column`. fn key_delete(&self, col: &str, key: &[u8]) -> Result<(), Error> { let column_key = Self::get_key_for_col(col, key); + + metrics::inc_counter(&metrics::DISK_DB_DELETE_COUNT); + self.db .delete(self.write_options(), column_key) .map_err(Into::into) diff --git a/beacon_node/store/src/metrics.rs b/beacon_node/store/src/metrics.rs index b6a055f10..430e9c38e 100644 --- a/beacon_node/store/src/metrics.rs +++ b/beacon_node/store/src/metrics.rs @@ -5,7 +5,31 @@ use std::path::PathBuf; lazy_static! { pub static ref DISK_DB_SIZE: Result = - try_create_int_gauge("database_size", "Size of the on-disk database (bytes)"); + try_create_int_gauge("store_disk_db_size", "Size of the on-disk database (bytes)"); + pub static ref DISK_DB_WRITE_BYTES: Result = try_create_int_counter( + "store_disk_db_write_bytes", + "Number of bytes attempted to be written to the on-disk DB" + ); + pub static ref DISK_DB_READ_BYTES: Result = try_create_int_counter( + "store_disk_db_read_bytes", + "Number of bytes read from the on-disk DB" + ); + pub static ref DISK_DB_READ_COUNT: Result = try_create_int_counter( + "store_disk_db_read_count", + "Total number of reads to the on-disk DB" + ); + pub static ref DISK_DB_WRITE_COUNT: Result = try_create_int_counter( + "store_disk_db_write_count", + "Total number of writes to the on-disk DB" + ); + pub static ref DISK_DB_EXISTS_COUNT: Result = try_create_int_counter( + "store_disk_db_exists_count", + "Total number of checks if a key is in the on-disk DB" + ); + pub static ref DISK_DB_DELETE_COUNT: Result = try_create_int_counter( + "store_disk_db_delete_count", + "Total number of deletions from the on-disk DB" + ); } /// Updates the global metrics registry with store-related information. diff --git a/eth2/utils/lighthouse_metrics/src/lib.rs b/eth2/utils/lighthouse_metrics/src/lib.rs index d55fcd3e2..a8656d017 100644 --- a/eth2/utils/lighthouse_metrics/src/lib.rs +++ b/eth2/utils/lighthouse_metrics/src/lib.rs @@ -41,6 +41,12 @@ pub fn inc_counter(counter: &Result) { } } +pub fn inc_counter_by(counter: &Result, value: i64) { + if let Ok(counter) = counter { + counter.inc_by(value); + } +} + pub fn set_gauge(gauge: &Result, value: i64) { if let Ok(gauge) = gauge { gauge.set(value); From cac0e5c83284fb05cf9d465cb8e2fc8dc0f3e4aa Mon Sep 17 00:00:00 2001 From: Paul Hauner Date: Mon, 12 Aug 2019 14:16:20 +1000 Subject: [PATCH 14/26] Add basic metrics to libp2p --- beacon_node/eth2-libp2p/Cargo.toml | 2 ++ beacon_node/eth2-libp2p/src/discovery.rs | 4 ++++ beacon_node/eth2-libp2p/src/lib.rs | 4 ++++ beacon_node/eth2-libp2p/src/metrics.rs | 16 ++++++++++++++++ 4 files changed, 26 insertions(+) create mode 100644 beacon_node/eth2-libp2p/src/metrics.rs diff --git a/beacon_node/eth2-libp2p/Cargo.toml b/beacon_node/eth2-libp2p/Cargo.toml index 794b09712..006b895a1 100644 --- a/beacon_node/eth2-libp2p/Cargo.toml +++ b/beacon_node/eth2-libp2p/Cargo.toml @@ -26,3 +26,5 @@ smallvec = "0.6.10" fnv = "1.0.6" unsigned-varint = "0.2.2" bytes = "0.4.12" +lazy_static = "1.3.0" +lighthouse_metrics = { path = "../../eth2/utils/lighthouse_metrics" } diff --git a/beacon_node/eth2-libp2p/src/discovery.rs b/beacon_node/eth2-libp2p/src/discovery.rs index 4c1794945..d9f2f7465 100644 --- a/beacon_node/eth2-libp2p/src/discovery.rs +++ b/beacon_node/eth2-libp2p/src/discovery.rs @@ -1,3 +1,4 @@ +use crate::metrics; use crate::{error, NetworkConfig}; /// This manages the discovery and management of peers. /// @@ -158,10 +159,12 @@ where } fn inject_connected(&mut self, peer_id: PeerId, _endpoint: ConnectedPoint) { + metrics::inc_counter(&metrics::PEER_CONNECT_COUNT); self.connected_peers.insert(peer_id); } fn inject_disconnected(&mut self, peer_id: &PeerId, _endpoint: ConnectedPoint) { + metrics::inc_counter(&metrics::PEER_DISCONNECT_COUNT); self.connected_peers.remove(peer_id); } @@ -217,6 +220,7 @@ where } Discv5Event::SocketUpdated(socket) => { info!(self.log, "Address updated"; "IP" => format!("{}",socket.ip())); + metrics::inc_counter(&metrics::ADDRESS_UPDATE_COUNT); let mut address = Multiaddr::from(socket.ip()); address.push(Protocol::Tcp(self.tcp_port)); let enr = self.discovery.local_enr(); diff --git a/beacon_node/eth2-libp2p/src/lib.rs b/beacon_node/eth2-libp2p/src/lib.rs index 54a4f2a99..33d5ba9ed 100644 --- a/beacon_node/eth2-libp2p/src/lib.rs +++ b/beacon_node/eth2-libp2p/src/lib.rs @@ -2,10 +2,14 @@ /// all required libp2p functionality. /// /// This crate builds and manages the libp2p services required by the beacon node. +#[macro_use] +extern crate lazy_static; + pub mod behaviour; mod config; mod discovery; pub mod error; +mod metrics; pub mod rpc; mod service; diff --git a/beacon_node/eth2-libp2p/src/metrics.rs b/beacon_node/eth2-libp2p/src/metrics.rs new file mode 100644 index 000000000..a47037669 --- /dev/null +++ b/beacon_node/eth2-libp2p/src/metrics.rs @@ -0,0 +1,16 @@ +pub use lighthouse_metrics::*; + +lazy_static! { + pub static ref ADDRESS_UPDATE_COUNT: Result = try_create_int_counter( + "libp2p_address_update_count", + "Count of libp2p socked updated events (when our view of our IP address has changed)" + ); + pub static ref PEER_CONNECT_COUNT: Result = try_create_int_counter( + "libp2p_peer_connect_count", + "Count of libp2p peer connect events (not the current number of connected peers)" + ); + pub static ref PEER_DISCONNECT_COUNT: Result = try_create_int_counter( + "libp2p_peer_disconnect_count", + "Count of libp2p peer disconnect events" + ); +} From af334b2cf0a6278c576b23d93d1748fdb4a51960 Mon Sep 17 00:00:00 2001 From: Paul Hauner Date: Mon, 12 Aug 2019 14:30:46 +1000 Subject: [PATCH 15/26] Add metrics to HTTP server --- beacon_node/rest_api/Cargo.toml | 2 ++ beacon_node/rest_api/src/lib.rs | 17 +++++++++++++---- beacon_node/rest_api/src/metrics.rs | 21 +++++++++++++++++++++ 3 files changed, 36 insertions(+), 4 deletions(-) diff --git a/beacon_node/rest_api/Cargo.toml b/beacon_node/rest_api/Cargo.toml index 821d6c0ea..100e680de 100644 --- a/beacon_node/rest_api/Cargo.toml +++ b/beacon_node/rest_api/Cargo.toml @@ -24,3 +24,5 @@ futures = "0.1" exit-future = "0.1.3" tokio = "0.1.17" url = "2.0" +lazy_static = "1.3.0" +lighthouse_metrics = { path = "../../eth2/utils/lighthouse_metrics" } diff --git a/beacon_node/rest_api/src/lib.rs b/beacon_node/rest_api/src/lib.rs index fea67618b..57019deea 100644 --- a/beacon_node/rest_api/src/lib.rs +++ b/beacon_node/rest_api/src/lib.rs @@ -1,5 +1,6 @@ -extern crate futures; -extern crate hyper; +#[macro_use] +extern crate lazy_static; + mod beacon; mod config; mod helpers; @@ -100,6 +101,9 @@ pub fn start_server( // Create a simple handler for the router, inject our stateful objects into the request. service_fn_ok(move |mut req| { + metrics::inc_counter(&metrics::REQUEST_COUNT); + let timer = metrics::start_timer(&metrics::REQUEST_RESPONSE_TIME); + req.extensions_mut().insert::(log.clone()); req.extensions_mut() .insert::>>(beacon_chain.clone()); @@ -117,9 +121,10 @@ pub fn start_server( _ => Err(ApiError::MethodNotAllowed(path.clone())), }; - match result { + let response = match result { // Return the `hyper::Response`. Ok(response) => { + metrics::inc_counter(&metrics::SUCCESS_COUNT); slog::debug!(log, "Request successful: {:?}", path); response } @@ -128,7 +133,11 @@ pub fn start_server( slog::debug!(log, "Request failure: {:?}", path); e.into() } - } + }; + + metrics::stop_timer(timer); + + response }) }; diff --git a/beacon_node/rest_api/src/metrics.rs b/beacon_node/rest_api/src/metrics.rs index 0cd700c44..c0db810b6 100644 --- a/beacon_node/rest_api/src/metrics.rs +++ b/beacon_node/rest_api/src/metrics.rs @@ -4,7 +4,28 @@ use hyper::{Body, Request}; use prometheus::{Encoder, TextEncoder}; use std::sync::Arc; +pub use lighthouse_metrics::*; + +lazy_static! { + pub static ref REQUEST_RESPONSE_TIME: Result = try_create_histogram( + "http_server_request_response_time", + "Time taken to build a response to a HTTP request" + ); + pub static ref REQUEST_COUNT: Result = try_create_int_counter( + "http_server_request_count", + "Total count of HTTP requests received" + ); + pub static ref SUCCESS_COUNT: Result = try_create_int_counter( + "http_server_success_count", + "Total count of HTTP 200 responses sent" + ); +} + /// Returns the full set of Prometheus metrics for the Beacon Node application. +/// +/// # Note +/// +/// This is a HTTP handler method. pub fn get_prometheus(req: Request) -> ApiResult { let mut buffer = vec![]; let encoder = TextEncoder::new(); From 6a1e5f6d26c4dffd126faaedd970b4d8446a1ce3 Mon Sep 17 00:00:00 2001 From: Paul Hauner Date: Mon, 12 Aug 2019 15:19:39 +1000 Subject: [PATCH 16/26] Remove old `http_server` crate --- Cargo.toml | 1 - beacon_node/client/Cargo.toml | 1 - beacon_node/client/src/config.rs | 4 - beacon_node/client/src/lib.rs | 22 --- beacon_node/http_server/Cargo.toml | 23 --- beacon_node/http_server/src/api.rs | 71 -------- beacon_node/http_server/src/key.rs | 33 ---- beacon_node/http_server/src/lib.rs | 145 ----------------- beacon_node/http_server/src/metrics.rs | 72 -------- .../http_server/src/metrics/local_metrics.rs | 154 ------------------ beacon_node/rest_api/src/config.rs | 2 +- docs/config_examples/beacon-node.toml | 10 +- 12 files changed, 2 insertions(+), 536 deletions(-) delete mode 100644 beacon_node/http_server/Cargo.toml delete mode 100644 beacon_node/http_server/src/api.rs delete mode 100644 beacon_node/http_server/src/key.rs delete mode 100644 beacon_node/http_server/src/lib.rs delete mode 100644 beacon_node/http_server/src/metrics.rs delete mode 100644 beacon_node/http_server/src/metrics/local_metrics.rs diff --git a/Cargo.toml b/Cargo.toml index 9b7b87a0d..f087539e6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,7 +26,6 @@ members = [ "beacon_node", "beacon_node/store", "beacon_node/client", - "beacon_node/http_server", "beacon_node/rest_api", "beacon_node/network", "beacon_node/eth2-libp2p", diff --git a/beacon_node/client/Cargo.toml b/beacon_node/client/Cargo.toml index 8c72fa417..b13f175a9 100644 --- a/beacon_node/client/Cargo.toml +++ b/beacon_node/client/Cargo.toml @@ -7,7 +7,6 @@ edition = "2018" [dependencies] beacon_chain = { path = "../beacon_chain" } network = { path = "../network" } -http_server = { path = "../http_server" } rpc = { path = "../rpc" } rest_api = { path = "../rest_api" } prometheus = "^0.6" diff --git a/beacon_node/client/src/config.rs b/beacon_node/client/src/config.rs index ee62b6281..fcc2cc7da 100644 --- a/beacon_node/client/src/config.rs +++ b/beacon_node/client/src/config.rs @@ -1,6 +1,5 @@ use crate::Eth2Config; use clap::ArgMatches; -use http_server::HttpServerConfig; use network::NetworkConfig; use serde_derive::{Deserialize, Serialize}; use slog::{info, o, Drain}; @@ -25,7 +24,6 @@ pub struct Config { pub genesis_state: GenesisState, pub network: network::NetworkConfig, pub rpc: rpc::RPCConfig, - pub http: HttpServerConfig, pub rest_api: rest_api::ApiConfig, } @@ -59,7 +57,6 @@ impl Default for Config { db_name: "chain_db".to_string(), network: NetworkConfig::new(), rpc: rpc::RPCConfig::default(), - http: HttpServerConfig::default(), rest_api: rest_api::ApiConfig::default(), spec_constants: TESTNET_SPEC_CONSTANTS.into(), genesis_state: GenesisState::RecentGenesis { @@ -143,7 +140,6 @@ impl Config { self.network.apply_cli_args(args)?; self.rpc.apply_cli_args(args)?; - self.http.apply_cli_args(args)?; self.rest_api.apply_cli_args(args)?; if let Some(log_file) = args.value_of("logfile") { diff --git a/beacon_node/client/src/lib.rs b/beacon_node/client/src/lib.rs index c74787f60..5c37ac3e9 100644 --- a/beacon_node/client/src/lib.rs +++ b/beacon_node/client/src/lib.rs @@ -10,7 +10,6 @@ use beacon_chain::BeaconChain; use exit_future::Signal; use futures::{future::Future, Stream}; use network::Service as NetworkService; -use prometheus::Registry; use slog::{error, info, o}; use slot_clock::SlotClock; use std::marker::PhantomData; @@ -36,8 +35,6 @@ pub struct Client { pub network: Arc>, /// Signal to terminate the RPC server. pub rpc_exit_signal: Option, - /// Signal to terminate the HTTP server. - pub http_exit_signal: Option, /// Signal to terminate the slot timer. pub slot_timer_exit_signal: Option, /// Signal to terminate the API @@ -60,7 +57,6 @@ where log: slog::Logger, executor: &TaskExecutor, ) -> error::Result { - let metrics_registry = Registry::new(); let store = Arc::new(store); let seconds_per_slot = eth2_config.spec.seconds_per_slot; @@ -119,23 +115,6 @@ where None }; - // Start the `http_server` service. - // - // Note: presently we are ignoring the config and _always_ starting a HTTP server. - let http_exit_signal = if client_config.http.enabled { - Some(http_server::start_service( - &client_config.http, - executor, - network_send, - beacon_chain.clone(), - client_config.db_path().expect("unable to read datadir"), - metrics_registry, - &log, - )) - } else { - None - }; - // Start the `rest_api` service let api_exit_signal = if client_config.rest_api.enabled { match rest_api::start_server( @@ -184,7 +163,6 @@ where Ok(Client { _client_config: client_config, beacon_chain, - http_exit_signal, rpc_exit_signal, slot_timer_exit_signal: Some(slot_timer_exit_signal), api_exit_signal, diff --git a/beacon_node/http_server/Cargo.toml b/beacon_node/http_server/Cargo.toml deleted file mode 100644 index e87ff2997..000000000 --- a/beacon_node/http_server/Cargo.toml +++ /dev/null @@ -1,23 +0,0 @@ -[package] -name = "http_server" -version = "0.1.0" -authors = ["Paul Hauner "] -edition = "2018" - -[dependencies] -beacon_chain = { path = "../beacon_chain" } -iron = "^0.6" -router = "^0.6" -network = { path = "../network" } -types = { path = "../../eth2/types" } -slot_clock = { path = "../../eth2/utils/slot_clock" } -persistent = "^0.4" -prometheus = { version = "^0.6", features = ["process"] } -clap = "2.32.0" -futures = "0.1.23" -serde = "1.0" -serde_derive = "1.0" -serde_json = "1.0" -slog = { version = "^2.2.3" , features = ["max_level_trace"] } -tokio = "0.1.17" -exit-future = "0.1.4" diff --git a/beacon_node/http_server/src/api.rs b/beacon_node/http_server/src/api.rs deleted file mode 100644 index 8cb023b02..000000000 --- a/beacon_node/http_server/src/api.rs +++ /dev/null @@ -1,71 +0,0 @@ -use crate::{key::BeaconChainKey, map_persistent_err_to_500}; -use beacon_chain::{BeaconChain, BeaconChainTypes}; -use iron::prelude::*; -use iron::{ - headers::{CacheControl, CacheDirective, ContentType}, - status::Status, - AfterMiddleware, Handler, IronResult, Request, Response, -}; -use persistent::Read; -use router::Router; -use serde_json::json; -use std::sync::Arc; - -/// Yields a handler for the HTTP API. -pub fn build_handler( - beacon_chain: Arc>, -) -> impl Handler { - let mut router = Router::new(); - - router.get("/node/fork", handle_fork::, "fork"); - - let mut chain = Chain::new(router); - - // Insert `BeaconChain` so it may be accessed in a request. - chain.link(Read::>::both(beacon_chain.clone())); - // Set the content-type headers. - chain.link_after(SetJsonContentType); - // Set the cache headers. - chain.link_after(SetCacheDirectives); - - chain -} - -/// Sets the `cache-control` headers on _all_ responses, unless they are already set. -struct SetCacheDirectives; -impl AfterMiddleware for SetCacheDirectives { - fn after(&self, _req: &mut Request, mut resp: Response) -> IronResult { - // This is run for every requests, AFTER all handlers have been executed - if resp.headers.get::() == None { - resp.headers.set(CacheControl(vec![ - CacheDirective::NoCache, - CacheDirective::NoStore, - ])); - } - Ok(resp) - } -} - -/// Sets the `content-type` headers on _all_ responses, unless they are already set. -struct SetJsonContentType; -impl AfterMiddleware for SetJsonContentType { - fn after(&self, _req: &mut Request, mut resp: Response) -> IronResult { - if resp.headers.get::() == None { - resp.headers.set(ContentType::json()); - } - Ok(resp) - } -} - -fn handle_fork(req: &mut Request) -> IronResult { - let beacon_chain = req - .get::>>() - .map_err(map_persistent_err_to_500)?; - - let response = json!({ - "fork": beacon_chain.head().beacon_state.fork, - "network_id": beacon_chain.spec.network_id - }); - - Ok(Response::with((Status::Ok, response.to_string()))) -} diff --git a/beacon_node/http_server/src/key.rs b/beacon_node/http_server/src/key.rs deleted file mode 100644 index a69da6747..000000000 --- a/beacon_node/http_server/src/key.rs +++ /dev/null @@ -1,33 +0,0 @@ -use crate::metrics::LocalMetrics; -use beacon_chain::{BeaconChain, BeaconChainTypes}; -use iron::typemap::Key; -use prometheus::Registry; -use std::marker::PhantomData; -use std::path::PathBuf; -use std::sync::Arc; - -pub struct BeaconChainKey { - _phantom: PhantomData, -} - -impl Key for BeaconChainKey { - type Value = Arc>; -} - -pub struct MetricsRegistryKey; - -impl Key for MetricsRegistryKey { - type Value = Registry; -} - -pub struct LocalMetricsKey; - -impl Key for LocalMetricsKey { - type Value = LocalMetrics; -} - -pub struct DBPathKey; - -impl Key for DBPathKey { - type Value = PathBuf; -} diff --git a/beacon_node/http_server/src/lib.rs b/beacon_node/http_server/src/lib.rs deleted file mode 100644 index f1d006a5b..000000000 --- a/beacon_node/http_server/src/lib.rs +++ /dev/null @@ -1,145 +0,0 @@ -mod api; -mod key; -mod metrics; - -use beacon_chain::{BeaconChain, BeaconChainTypes}; -use clap::ArgMatches; -use futures::Future; -use iron::prelude::*; -use network::NetworkMessage; -use prometheus::Registry; -use router::Router; -use serde_derive::{Deserialize, Serialize}; -use slog::{info, o, warn}; -use std::path::PathBuf; -use std::sync::Arc; -use tokio::runtime::TaskExecutor; -use tokio::sync::mpsc; - -#[derive(PartialEq, Clone, Debug, Serialize, Deserialize)] -pub struct HttpServerConfig { - pub enabled: bool, - pub listen_address: String, - pub listen_port: String, -} - -impl Default for HttpServerConfig { - fn default() -> Self { - Self { - enabled: false, - listen_address: "127.0.0.1".to_string(), - listen_port: "5052".to_string(), - } - } -} - -impl HttpServerConfig { - pub fn apply_cli_args(&mut self, args: &ArgMatches) -> Result<(), &'static str> { - if args.is_present("http") { - self.enabled = true; - } - - if let Some(listen_address) = args.value_of("http-address") { - self.listen_address = listen_address.to_string(); - } - - if let Some(listen_port) = args.value_of("http-port") { - self.listen_port = listen_port.to_string(); - } - - Ok(()) - } -} - -/// Build the `iron` HTTP server, defining the core routes. -pub fn create_iron_http_server( - beacon_chain: Arc>, - db_path: PathBuf, - metrics_registry: Registry, -) -> Iron { - let mut router = Router::new(); - - // A `GET` request to `/metrics` is handled by the `metrics` module. - router.get( - "/metrics", - metrics::build_handler(beacon_chain.clone(), db_path, metrics_registry), - "metrics", - ); - - // Any request to all other endpoints is handled by the `api` module. - router.any("/*", api::build_handler(beacon_chain.clone()), "api"); - - Iron::new(router) -} - -/// Start the HTTP service on the tokio `TaskExecutor`. -pub fn start_service( - config: &HttpServerConfig, - executor: &TaskExecutor, - _network_chan: mpsc::UnboundedSender, - beacon_chain: Arc>, - db_path: PathBuf, - metrics_registry: Registry, - log: &slog::Logger, -) -> exit_future::Signal { - let log = log.new(o!("Service"=>"HTTP")); - - // Create: - // - `shutdown_trigger` a one-shot to shut down this service. - // - `wait_for_shutdown` a future that will wait until someone calls shutdown. - let (shutdown_trigger, wait_for_shutdown) = exit_future::signal(); - - // Create an `iron` http, without starting it yet. - let iron = create_iron_http_server(beacon_chain, db_path, metrics_registry); - - // Create a HTTP server future. - // - // 1. Start the HTTP server - // 2. Build an exit future that will shutdown the server when requested. - // 3. Return the exit future, so the caller may shutdown the service when desired. - let http_service = { - let listen_address = format!("{}:{}", config.listen_address, config.listen_port); - // Start the HTTP server - let server_start_result = iron.http(listen_address.clone()); - - if server_start_result.is_ok() { - info!(log, "HTTP server running on {}", listen_address); - } else { - warn!(log, "HTTP server failed to start on {}", listen_address); - } - - // Build a future that will shutdown the HTTP server when the `shutdown_trigger` is - // triggered. - wait_for_shutdown.and_then(move |_| { - info!(log, "HTTP server shutting down"); - - if let Ok(mut server) = server_start_result { - // According to the documentation, `server.close()` "doesn't work" and the server - // keeps listening. - // - // It is being called anyway, because it seems like the right thing to do. If you - // know this has negative side-effects, please create an issue to discuss. - // - // See: https://docs.rs/iron/0.6.0/iron/struct.Listening.html#impl - match server.close() { - _ => (), - }; - } - info!(log, "HTTP server shutdown complete."); - Ok(()) - }) - }; - - // Attach the HTTP server to the executor. - executor.spawn(http_service); - - shutdown_trigger -} - -/// Helper function for mapping a failure to read state to a 500 server error. -fn map_persistent_err_to_500(e: persistent::PersistentError) -> iron::error::IronError { - iron::error::IronError { - error: Box::new(e), - response: iron::Response::with(iron::status::Status::InternalServerError), - } -} diff --git a/beacon_node/http_server/src/metrics.rs b/beacon_node/http_server/src/metrics.rs deleted file mode 100644 index 1b1ed1f3d..000000000 --- a/beacon_node/http_server/src/metrics.rs +++ /dev/null @@ -1,72 +0,0 @@ -use crate::{ - key::{BeaconChainKey, DBPathKey, LocalMetricsKey, MetricsRegistryKey}, - map_persistent_err_to_500, -}; -use beacon_chain::{BeaconChain, BeaconChainTypes}; -use iron::prelude::*; -use iron::{status::Status, Handler, IronResult, Request, Response}; -use persistent::Read; -use prometheus::{Encoder, Registry, TextEncoder}; -use std::path::PathBuf; -use std::sync::Arc; - -pub use local_metrics::LocalMetrics; - -mod local_metrics; - -/// Yields a handler for the metrics endpoint. -pub fn build_handler( - beacon_chain: Arc>, - db_path: PathBuf, - metrics_registry: Registry, -) -> impl Handler { - let mut chain = Chain::new(handle_metrics::); - - let local_metrics = LocalMetrics::new().unwrap(); - local_metrics.register(&metrics_registry).unwrap(); - - chain.link(Read::>::both(beacon_chain)); - chain.link(Read::::both(metrics_registry)); - chain.link(Read::::both(local_metrics)); - chain.link(Read::::both(db_path)); - - chain -} - -/// Handle a request for Prometheus metrics. -/// -/// Returns a text string containing all metrics. -fn handle_metrics(req: &mut Request) -> IronResult { - let beacon_chain = req - .get::>>() - .map_err(map_persistent_err_to_500)?; - - let r = req - .get::>() - .map_err(map_persistent_err_to_500)?; - - let local_metrics = req - .get::>() - .map_err(map_persistent_err_to_500)?; - - let db_path = req - .get::>() - .map_err(map_persistent_err_to_500)?; - - // Update metrics that are calculated on each scrape. - local_metrics.update(&beacon_chain, &db_path); - - let mut buffer = vec![]; - let encoder = TextEncoder::new(); - - // Gather `DEFAULT_REGISTRY` metrics. - encoder.encode(&prometheus::gather(), &mut buffer).unwrap(); - - // Gather metrics from our registry. - let metric_families = r.gather(); - encoder.encode(&metric_families, &mut buffer).unwrap(); - - let prom_string = String::from_utf8(buffer).unwrap(); - - Ok(Response::with((Status::Ok, prom_string))) -} diff --git a/beacon_node/http_server/src/metrics/local_metrics.rs b/beacon_node/http_server/src/metrics/local_metrics.rs deleted file mode 100644 index b342cca81..000000000 --- a/beacon_node/http_server/src/metrics/local_metrics.rs +++ /dev/null @@ -1,154 +0,0 @@ -use beacon_chain::{BeaconChain, BeaconChainTypes}; -use prometheus::{IntGauge, Opts, Registry}; -use slot_clock::SlotClock; -use std::fs; -use std::path::PathBuf; -use types::{EthSpec, Slot}; - -// If set to `true` will iterate and sum the balances of all validators in the state for each -// scrape. -const SHOULD_SUM_VALIDATOR_BALANCES: bool = true; - -pub struct LocalMetrics { - present_slot: IntGauge, - present_epoch: IntGauge, - best_slot: IntGauge, - best_beacon_block_root: IntGauge, - justified_beacon_block_root: IntGauge, - finalized_beacon_block_root: IntGauge, - validator_count: IntGauge, - justified_epoch: IntGauge, - finalized_epoch: IntGauge, - validator_balances_sum: IntGauge, - database_size: IntGauge, -} - -impl LocalMetrics { - /// Create a new instance. - pub fn new() -> Result { - Ok(Self { - present_slot: { - let opts = Opts::new("present_slot", "slot_at_time_of_scrape"); - IntGauge::with_opts(opts)? - }, - present_epoch: { - let opts = Opts::new("present_epoch", "epoch_at_time_of_scrape"); - IntGauge::with_opts(opts)? - }, - best_slot: { - let opts = Opts::new("best_slot", "slot_of_block_at_chain_head"); - IntGauge::with_opts(opts)? - }, - best_beacon_block_root: { - let opts = Opts::new("best_beacon_block_root", "root_of_block_at_chain_head"); - IntGauge::with_opts(opts)? - }, - justified_beacon_block_root: { - let opts = Opts::new( - "justified_beacon_block_root", - "root_of_block_at_justified_head", - ); - IntGauge::with_opts(opts)? - }, - finalized_beacon_block_root: { - let opts = Opts::new( - "finalized_beacon_block_root", - "root_of_block_at_finalized_head", - ); - IntGauge::with_opts(opts)? - }, - validator_count: { - let opts = Opts::new("validator_count", "number_of_validators"); - IntGauge::with_opts(opts)? - }, - justified_epoch: { - let opts = Opts::new("justified_epoch", "state_justified_epoch"); - IntGauge::with_opts(opts)? - }, - finalized_epoch: { - let opts = Opts::new("finalized_epoch", "state_finalized_epoch"); - IntGauge::with_opts(opts)? - }, - validator_balances_sum: { - let opts = Opts::new("validator_balances_sum", "sum_of_all_validator_balances"); - IntGauge::with_opts(opts)? - }, - database_size: { - let opts = Opts::new("database_size", "size_of_on_disk_db_in_mb"); - IntGauge::with_opts(opts)? - }, - }) - } - - /// Registry this instance with the `registry`. - pub fn register(&self, registry: &Registry) -> Result<(), prometheus::Error> { - registry.register(Box::new(self.present_slot.clone()))?; - registry.register(Box::new(self.present_epoch.clone()))?; - registry.register(Box::new(self.best_slot.clone()))?; - registry.register(Box::new(self.best_beacon_block_root.clone()))?; - registry.register(Box::new(self.justified_beacon_block_root.clone()))?; - registry.register(Box::new(self.finalized_beacon_block_root.clone()))?; - registry.register(Box::new(self.validator_count.clone()))?; - registry.register(Box::new(self.finalized_epoch.clone()))?; - registry.register(Box::new(self.justified_epoch.clone()))?; - registry.register(Box::new(self.validator_balances_sum.clone()))?; - registry.register(Box::new(self.database_size.clone()))?; - - Ok(()) - } - - /// Update the metrics in `self` to the latest values. - pub fn update(&self, beacon_chain: &BeaconChain, db_path: &PathBuf) { - let state = &beacon_chain.head().beacon_state; - - let present_slot = beacon_chain - .slot_clock - .present_slot() - .unwrap_or_else(|_| None) - .unwrap_or_else(|| Slot::new(0)); - self.present_slot.set(present_slot.as_u64() as i64); - self.present_epoch - .set(present_slot.epoch(T::EthSpec::slots_per_epoch()).as_u64() as i64); - - self.best_slot.set(state.slot.as_u64() as i64); - self.best_beacon_block_root - .set(beacon_chain.head().beacon_block_root.to_low_u64_le() as i64); - self.justified_beacon_block_root.set( - beacon_chain - .head() - .beacon_state - .current_justified_checkpoint - .root - .to_low_u64_le() as i64, - ); - self.finalized_beacon_block_root.set( - beacon_chain - .head() - .beacon_state - .finalized_checkpoint - .root - .to_low_u64_le() as i64, - ); - self.validator_count.set(state.validators.len() as i64); - self.justified_epoch - .set(state.current_justified_checkpoint.epoch.as_u64() as i64); - self.finalized_epoch - .set(state.finalized_checkpoint.epoch.as_u64() as i64); - if SHOULD_SUM_VALIDATOR_BALANCES { - self.validator_balances_sum - .set(state.balances.iter().sum::() as i64); - } - let db_size = if let Ok(iter) = fs::read_dir(db_path) { - iter.filter_map(Result::ok) - .map(size_of_dir_entry) - .fold(0_u64, |sum, val| sum + val) - } else { - 0 - }; - self.database_size.set(db_size as i64); - } -} - -fn size_of_dir_entry(dir: fs::DirEntry) -> u64 { - dir.metadata().map(|m| m.len()).unwrap_or(0) -} diff --git a/beacon_node/rest_api/src/config.rs b/beacon_node/rest_api/src/config.rs index c4a9c738a..90ac0821b 100644 --- a/beacon_node/rest_api/src/config.rs +++ b/beacon_node/rest_api/src/config.rs @@ -18,7 +18,7 @@ impl Default for Config { Config { enabled: true, // rest_api enabled by default listen_address: Ipv4Addr::new(127, 0, 0, 1), - port: 1248, + port: 5052, } } } diff --git a/docs/config_examples/beacon-node.toml b/docs/config_examples/beacon-node.toml index 3c9f8b613..f0863934e 100644 --- a/docs/config_examples/beacon-node.toml +++ b/docs/config_examples/beacon-node.toml @@ -78,14 +78,6 @@ enabled = false listen_address = "127.0.0.1" port = 5051 -# -# Legacy HTTP server configuration. To be removed. -# -[http] -enabled = false -listen_address = "127.0.0.1" -listen_port = "5052" - # # RESTful HTTP API server configuration. # @@ -95,4 +87,4 @@ enabled = true # The listen port for the HTTP server. listen_address = "127.0.0.1" # The listen port for the HTTP server. -port = 1248 +port = 5052 From 95a320817e0c724b6d4ed64b9bf2fefacc918aa6 Mon Sep 17 00:00:00 2001 From: Paul Hauner Date: Mon, 12 Aug 2019 15:40:51 +1000 Subject: [PATCH 17/26] Update metrics names to be more like standard --- beacon_node/beacon_chain/src/metrics.rs | 65 +++++++++++++------------ beacon_node/rest_api/src/metrics.rs | 6 +-- beacon_node/store/src/metrics.rs | 12 ++--- 3 files changed, 43 insertions(+), 40 deletions(-) diff --git a/beacon_node/beacon_chain/src/metrics.rs b/beacon_node/beacon_chain/src/metrics.rs index 227f1090f..00a3e5eb2 100644 --- a/beacon_node/beacon_chain/src/metrics.rs +++ b/beacon_node/beacon_chain/src/metrics.rs @@ -7,45 +7,45 @@ lazy_static! { * Block Processing */ pub static ref BLOCK_PROCESSING_REQUESTS: Result = try_create_int_counter( - "beacon_block_processing_requests", + "beacon_block_processing_requests_total", "Count of blocks submitted for processing" ); pub static ref BLOCK_PROCESSING_SUCCESSES: Result = try_create_int_counter( - "beacon_block_processing_successes", + "beacon_block_processing_successes_total", "Count of blocks processed without error" ); pub static ref BLOCK_PROCESSING_TIMES: Result = - try_create_histogram("block_processing_times", "Full runtime of block processing"); + try_create_histogram("block_processing_seconds", "Full runtime of block processing"); pub static ref BLOCK_PROCESSING_DB_READ: Result = try_create_histogram( - "beacon_block_processing_db_read_times", + "beacon_block_processing_db_read_seconds", "Time spent loading block and state from DB for block processing" ); pub static ref BLOCK_PROCESSING_CATCHUP_STATE: Result = try_create_histogram( - "beacon_block_processing_catch_up_state_times", + "beacon_block_processing_catch_up_state_seconds", "Time spent skipping slots on a state before processing a block." ); pub static ref BLOCK_PROCESSING_COMMITTEE: Result = try_create_histogram( - "beacon_block_processing_committee_building_times", + "beacon_block_processing_committee_building_seconds", "Time spent building/obtaining committees for block processing." ); pub static ref BLOCK_PROCESSING_CORE: Result = try_create_histogram( - "beacon_block_processing_core_times", + "beacon_block_processing_core_seconds", "Time spent doing the core per_block_processing state processing." ); pub static ref BLOCK_PROCESSING_STATE_ROOT: Result = try_create_histogram( - "beacon_block_processing_state_root_times", + "beacon_block_processing_state_root_seconds", "Time spent calculating the state root when processing a block." ); pub static ref BLOCK_PROCESSING_DB_WRITE: Result = try_create_histogram( - "beacon_block_processing_db_write_times", + "beacon_block_processing_db_write_seconds", "Time spent writing a newly processed block and state to DB" ); pub static ref BLOCK_PROCESSING_FORK_CHOICE_REGISTER: Result = try_create_histogram( - "beacon_block_processing_fork_choice_register_times", + "beacon_block_processing_fork_choice_register_seconds", "Time spent registering the new block with fork choice (but not finding head)" ); pub static ref BLOCK_PROCESSING_FORK_CHOICE_FIND_HEAD: Result = try_create_histogram( - "beacon_block_processing_fork_choice_find_head_times", + "beacon_block_processing_fork_choice_find_head_seconds", "Time spent finding the new head after processing a new block" ); @@ -53,21 +53,21 @@ lazy_static! { * Block Production */ pub static ref BLOCK_PRODUCTION_REQUESTS: Result = try_create_int_counter( - "beacon_block_production_requests", + "beacon_block_production_requests_total", "Count of all block production requests" ); pub static ref BLOCK_PRODUCTION_SUCCESSES: Result = try_create_int_counter( - "beacon_block_production_successes", + "beacon_block_production_successes_total", "Count of blocks successfully produced." ); pub static ref BLOCK_PRODUCTION_TIMES: Result = - try_create_histogram("beacon_block_production_times", "Full runtime of block production"); + try_create_histogram("beacon_block_production_seconds", "Full runtime of block production"); /* * Block Statistics */ pub static ref OPERATIONS_PER_BLOCK_ATTESTATION: Result = try_create_histogram( - "beacon_operations_per_block_attestation", + "beacon_operations_per_block_attestation_total", "Number of attestations in a block" ); @@ -75,15 +75,15 @@ lazy_static! { * Attestation Processing */ pub static ref ATTESTATION_PROCESSING_REQUESTS: Result = try_create_int_counter( - "beacon_attestation_processing_requests", + "beacon_attestation_processing_requests_total", "Count of all attestations submitted for processing" ); pub static ref ATTESTATION_PROCESSING_SUCCESSES: Result = try_create_int_counter( - "beacon_attestation_processing_successes", + "beacon_attestation_processing_successes_total", "total_attestation_processing_successes" ); pub static ref ATTESTATION_PROCESSING_TIMES: Result = try_create_histogram( - "beacon_attestation_processing_times", + "beacon_attestation_processing_seconds", "Full runtime of attestation processing" ); @@ -91,15 +91,15 @@ lazy_static! { * Attestation Production */ pub static ref ATTESTATION_PRODUCTION_REQUESTS: Result = try_create_int_counter( - "beacon_attestation_production_requests", + "beacon_attestation_production_requests_total", "Count of all attestation production requests" ); pub static ref ATTESTATION_PRODUCTION_SUCCESSES: Result = try_create_int_counter( - "beacon_attestation_production_successes", + "beacon_attestation_production_successes_total", "Count of attestations processed without error" ); pub static ref ATTESTATION_PRODUCTION_TIMES: Result = try_create_histogram( - "beacon_attestation_production_times", + "beacon_attestation_production_seconds", "Full runtime of attestation production" ); @@ -107,19 +107,19 @@ lazy_static! { * Fork Choice */ pub static ref FORK_CHOICE_REQUESTS: Result = try_create_int_counter( - "beacon_fork_choice_requests", + "beacon_fork_choice_requests_total", "Count of occasions where fork choice has tried to find a head" ); pub static ref FORK_CHOICE_ERRORS: Result = try_create_int_counter( - "beacon_fork_choice_errors", + "beacon_fork_choice_errors_total", "Count of occasions where fork choice has returned an error when trying to find a head" ); pub static ref FORK_CHOICE_CHANGED_HEAD: Result = try_create_int_counter( - "beacon_fork_choice_changed_head", + "beacon_fork_choice_changed_head_total", "Count of occasions fork choice has found a new head" ); pub static ref FORK_CHOICE_REORG_COUNT: Result = try_create_int_counter( - "beacon_fork_choice_reorg_count", + "beacon_fork_choice_reorg_total", "Count of occasions fork choice has switched to a different chain" ); pub static ref FORK_CHOICE_TIMES: Result = @@ -156,7 +156,7 @@ lazy_static! { * Chain Head */ pub static ref UPDATE_HEAD_TIMES: Result = - try_create_histogram("beacon_update_head_times", "Time taken to update the canonical head"); + try_create_histogram("beacon_update_head_seconds", "Time taken to update the canonical head"); pub static ref HEAD_STATE_SLOT: Result = try_create_int_gauge("beacon_head_state_slot", "Slot of the block at the head of the chain"); pub static ref HEAD_STATE_ROOT: Result = @@ -175,16 +175,18 @@ lazy_static! { try_create_int_gauge("beacon_head_state_finalized_root", "Finalized root at the head of the chain"); pub static ref HEAD_STATE_FINALIZED_EPOCH: Result = try_create_int_gauge("beacon_head_state_finalized_epoch", "Finalized epoch at the head of the chain"); + pub static ref HEAD_STATE_SHARDS: Result = + try_create_int_gauge("beacon_head_state_shard_total", "Count of shards in the beacon chain"); pub static ref HEAD_STATE_TOTAL_VALIDATORS: Result = - try_create_int_gauge("beacon_head_state_total_validators", "Count of validators at the head of the chain"); + try_create_int_gauge("beacon_head_state_total_validators_total", "Count of validators at the head of the chain"); pub static ref HEAD_STATE_ACTIVE_VALIDATORS: Result = - try_create_int_gauge("beacon_head_state_active_validators", "Count of active validators at the head of the chain"); + try_create_int_gauge("beacon_head_state_active_validators_total", "Count of active validators at the head of the chain"); pub static ref HEAD_STATE_VALIDATOR_BALANCES: Result = - try_create_int_gauge("beacon_head_state_validator_balances", "Sum of all validator balances at the head of the chain"); + try_create_int_gauge("beacon_head_state_validator_balances_total", "Sum of all validator balances at the head of the chain"); pub static ref HEAD_STATE_SLASHED_VALIDATORS: Result = - try_create_int_gauge("beacon_head_state_slashed_validators", "Count of all slashed validators at the head of the chain"); + try_create_int_gauge("beacon_head_state_slashed_validators_total", "Count of all slashed validators at the head of the chain"); pub static ref HEAD_STATE_WITHDRAWN_VALIDATORS: Result = - try_create_int_gauge("beacon_head_state_withdrawn_validators", "Sum of all validator balances at the head of the chain"); + try_create_int_gauge("beacon_head_state_withdrawn_validators_total", "Sum of all validator balances at the head of the chain"); pub static ref HEAD_STATE_ETH1_DEPOSIT_INDEX: Result = try_create_int_gauge("beacon_head_state_eth1_deposit_index", "Eth1 deposit index at the head of the chain"); } @@ -242,6 +244,7 @@ fn scrape_head_state(state: &BeaconState, state &HEAD_STATE_FINALIZED_EPOCH, state.finalized_checkpoint.epoch, ); + set_gauge_by_usize(&HEAD_STATE_SHARDS, state.previous_crosslinks.len()); set_gauge_by_usize(&HEAD_STATE_TOTAL_VALIDATORS, state.validators.len()); set_gauge_by_u64( &HEAD_STATE_VALIDATOR_BALANCES, diff --git a/beacon_node/rest_api/src/metrics.rs b/beacon_node/rest_api/src/metrics.rs index c0db810b6..b0f1c1b98 100644 --- a/beacon_node/rest_api/src/metrics.rs +++ b/beacon_node/rest_api/src/metrics.rs @@ -8,15 +8,15 @@ pub use lighthouse_metrics::*; lazy_static! { pub static ref REQUEST_RESPONSE_TIME: Result = try_create_histogram( - "http_server_request_response_time", + "http_server_request_duration_seconds", "Time taken to build a response to a HTTP request" ); pub static ref REQUEST_COUNT: Result = try_create_int_counter( - "http_server_request_count", + "http_server_request_total", "Total count of HTTP requests received" ); pub static ref SUCCESS_COUNT: Result = try_create_int_counter( - "http_server_success_count", + "http_server_success_total", "Total count of HTTP 200 responses sent" ); } diff --git a/beacon_node/store/src/metrics.rs b/beacon_node/store/src/metrics.rs index 430e9c38e..30cbb878b 100644 --- a/beacon_node/store/src/metrics.rs +++ b/beacon_node/store/src/metrics.rs @@ -7,27 +7,27 @@ lazy_static! { pub static ref DISK_DB_SIZE: Result = try_create_int_gauge("store_disk_db_size", "Size of the on-disk database (bytes)"); pub static ref DISK_DB_WRITE_BYTES: Result = try_create_int_counter( - "store_disk_db_write_bytes", + "store_disk_db_write_bytes_total", "Number of bytes attempted to be written to the on-disk DB" ); pub static ref DISK_DB_READ_BYTES: Result = try_create_int_counter( - "store_disk_db_read_bytes", + "store_disk_db_read_bytes_total", "Number of bytes read from the on-disk DB" ); pub static ref DISK_DB_READ_COUNT: Result = try_create_int_counter( - "store_disk_db_read_count", + "store_disk_db_read_count_total", "Total number of reads to the on-disk DB" ); pub static ref DISK_DB_WRITE_COUNT: Result = try_create_int_counter( - "store_disk_db_write_count", + "store_disk_db_write_count_total", "Total number of writes to the on-disk DB" ); pub static ref DISK_DB_EXISTS_COUNT: Result = try_create_int_counter( - "store_disk_db_exists_count", + "store_disk_db_exists_count_total", "Total number of checks if a key is in the on-disk DB" ); pub static ref DISK_DB_DELETE_COUNT: Result = try_create_int_counter( - "store_disk_db_delete_count", + "store_disk_db_delete_count_total", "Total number of deletions from the on-disk DB" ); } From d7c546844cfaf58ab63739a181fbf73c924fb4d5 Mon Sep 17 00:00:00 2001 From: Paul Hauner Date: Mon, 12 Aug 2019 17:44:47 +1000 Subject: [PATCH 18/26] Fix broken beacon chain metrics, add slot clock metrics --- beacon_node/beacon_chain/src/lib.rs | 1 + beacon_node/beacon_chain/src/metrics.rs | 28 +----------------- beacon_node/rest_api/Cargo.toml | 1 + beacon_node/rest_api/src/metrics.rs | 17 +++++++++++ eth2/utils/slot_clock/Cargo.toml | 2 ++ eth2/utils/slot_clock/src/lib.rs | 10 ++++++- eth2/utils/slot_clock/src/metrics.rs | 29 +++++++++++++++++++ .../slot_clock/src/system_time_slot_clock.rs | 4 +++ .../slot_clock/src/testing_slot_clock.rs | 4 +++ 9 files changed, 68 insertions(+), 28 deletions(-) create mode 100644 eth2/utils/slot_clock/src/metrics.rs diff --git a/beacon_node/beacon_chain/src/lib.rs b/beacon_node/beacon_chain/src/lib.rs index 1262bc537..cc7725dd8 100644 --- a/beacon_node/beacon_chain/src/lib.rs +++ b/beacon_node/beacon_chain/src/lib.rs @@ -1,3 +1,4 @@ +#![recursion_limit = "128"] // For lazy-static #[macro_use] extern crate lazy_static; diff --git a/beacon_node/beacon_chain/src/metrics.rs b/beacon_node/beacon_chain/src/metrics.rs index 00a3e5eb2..a4b36cd37 100644 --- a/beacon_node/beacon_chain/src/metrics.rs +++ b/beacon_node/beacon_chain/src/metrics.rs @@ -1,6 +1,6 @@ use crate::{BeaconChain, BeaconChainTypes}; pub use lighthouse_metrics::*; -use types::{BeaconState, Epoch, EthSpec, Hash256, Slot}; +use types::{BeaconState, Epoch, Hash256, Slot}; lazy_static! { /* @@ -140,17 +140,6 @@ lazy_static! { */ pub static ref PERSIST_CHAIN: Result = try_create_histogram("beacon_persist_chain", "Time taken to update the canonical head"); -} - -// Lazy-static is split so we don't reach the crate-level recursion limit. -lazy_static! { - /* - * Slot Clock - */ - pub static ref PRESENT_SLOT: Result = - try_create_int_gauge("beacon_present_slot", "The present slot, according to system time"); - pub static ref PRESENT_EPOCH: Result = - try_create_int_gauge("beacon_present_epoch", "The present epoch, according to system time"); /* * Chain Head @@ -194,21 +183,6 @@ lazy_static! { /// Scrape the `beacon_chain` for metrics that are not constantly updated (e.g., the present slot, /// head state info, etc) and update the Prometheus `DEFAULT_REGISTRY`. pub fn scrape_for_metrics(beacon_chain: &BeaconChain) { - set_gauge_by_slot( - &PRESENT_SLOT, - beacon_chain - .read_slot_clock() - .unwrap_or_else(|| Slot::new(0)), - ); - - set_gauge_by_epoch( - &PRESENT_EPOCH, - beacon_chain - .read_slot_clock() - .map(|s| s.epoch(T::EthSpec::slots_per_epoch())) - .unwrap_or_else(|| Epoch::new(0)), - ); - scrape_head_state::( &beacon_chain.head().beacon_state, beacon_chain.head().beacon_state_root, diff --git a/beacon_node/rest_api/Cargo.toml b/beacon_node/rest_api/Cargo.toml index 100e680de..c7026014c 100644 --- a/beacon_node/rest_api/Cargo.toml +++ b/beacon_node/rest_api/Cargo.toml @@ -26,3 +26,4 @@ tokio = "0.1.17" url = "2.0" lazy_static = "1.3.0" lighthouse_metrics = { path = "../../eth2/utils/lighthouse_metrics" } +slot_clock = { path = "../../eth2/utils/slot_clock" } diff --git a/beacon_node/rest_api/src/metrics.rs b/beacon_node/rest_api/src/metrics.rs index b0f1c1b98..f0ccef5f8 100644 --- a/beacon_node/rest_api/src/metrics.rs +++ b/beacon_node/rest_api/src/metrics.rs @@ -39,6 +39,23 @@ pub fn get_prometheus(req: Request) -> ApiR .get::() .ok_or_else(|| ApiError::ServerError("DBPath extension missing".to_string()))?; + // There are two categories of metrics: + // + // - Dynamically updated: things like histograms and event counters that are updated on the + // fly. + // - Statically updated: things which are only updated at the time of the scrape (used where we + // can avoid cluttering up code with metrics calls). + // + // The `prometheus` crate has a `DEFAULT_REGISTRY` global singleton (via `lazy_static`) which + // keeps the state of all the metrics. Dynamically updated things will already be up-to-date in + // the registry (because they update themselves) however statically updated things need to be + // "scraped". + // + // We proceed by, first updating all the static metrics using `scrape_for_metrics(..)`. Then, + // using `prometheus::gather(..)` to collect the global `DEFAULT_REGISTRY` metrics into a + // string that can be returned via HTTP. + + slot_clock::scrape_for_metrics::(&beacon_chain.slot_clock); store::scrape_for_metrics(&db_path); beacon_chain::scrape_for_metrics(&beacon_chain); diff --git a/eth2/utils/slot_clock/Cargo.toml b/eth2/utils/slot_clock/Cargo.toml index 31a435725..c4b9df5ed 100644 --- a/eth2/utils/slot_clock/Cargo.toml +++ b/eth2/utils/slot_clock/Cargo.toml @@ -6,3 +6,5 @@ edition = "2018" [dependencies] types = { path = "../../types" } +lazy_static = "1.3.0" +lighthouse_metrics = { path = "../lighthouse_metrics" } diff --git a/eth2/utils/slot_clock/src/lib.rs b/eth2/utils/slot_clock/src/lib.rs index 7b86684fa..871743c9e 100644 --- a/eth2/utils/slot_clock/src/lib.rs +++ b/eth2/utils/slot_clock/src/lib.rs @@ -1,9 +1,15 @@ +#[macro_use] +extern crate lazy_static; + +mod metrics; mod system_time_slot_clock; mod testing_slot_clock; +use std::time::Duration; + pub use crate::system_time_slot_clock::{Error as SystemTimeSlotClockError, SystemTimeSlotClock}; pub use crate::testing_slot_clock::{Error as TestingSlotClockError, TestingSlotClock}; -use std::time::Duration; +pub use metrics::scrape_for_metrics; pub use types::Slot; pub trait SlotClock: Send + Sync + Sized { @@ -17,4 +23,6 @@ pub trait SlotClock: Send + Sync + Sized { fn present_slot(&self) -> Result, Self::Error>; fn duration_to_next_slot(&self) -> Result, Self::Error>; + + fn slot_duration_millis(&self) -> u64; } diff --git a/eth2/utils/slot_clock/src/metrics.rs b/eth2/utils/slot_clock/src/metrics.rs new file mode 100644 index 000000000..a9153a10c --- /dev/null +++ b/eth2/utils/slot_clock/src/metrics.rs @@ -0,0 +1,29 @@ +use crate::SlotClock; +pub use lighthouse_metrics::*; +use types::{EthSpec, Slot}; + +lazy_static! { + pub static ref PRESENT_SLOT: Result = + try_create_int_gauge("slotclock_present_slot", "The present wall-clock slot"); + pub static ref PRESENT_EPOCH: Result = + try_create_int_gauge("slotclock_present_epoch", "The present wall-clock epoch"); + pub static ref MILLISECONDS_PER_SLOT: Result = try_create_int_gauge( + "slotclock_slot_time_milliseconds", + "The duration in milliseconds between each slot" + ); +} + +/// Update the global metrics `DEFAULT_REGISTRY` with info from the slot clock. +pub fn scrape_for_metrics(clock: &U) { + let present_slot = match clock.present_slot() { + Ok(Some(slot)) => slot, + _ => Slot::new(0), + }; + + set_gauge(&PRESENT_SLOT, present_slot.as_u64() as i64); + set_gauge( + &PRESENT_EPOCH, + present_slot.epoch(T::slots_per_epoch()).as_u64() as i64, + ); + set_gauge(&MILLISECONDS_PER_SLOT, clock.slot_duration_millis() as i64); +} diff --git a/eth2/utils/slot_clock/src/system_time_slot_clock.rs b/eth2/utils/slot_clock/src/system_time_slot_clock.rs index 7c184b02b..c493a8be8 100644 --- a/eth2/utils/slot_clock/src/system_time_slot_clock.rs +++ b/eth2/utils/slot_clock/src/system_time_slot_clock.rs @@ -52,6 +52,10 @@ impl SlotClock for SystemTimeSlotClock { fn duration_to_next_slot(&self) -> Result, Error> { duration_to_next_slot(self.genesis_seconds, self.slot_duration_seconds) } + + fn slot_duration_millis(&self) -> u64 { + self.slot_duration_seconds * 1000 + } } impl From for Error { diff --git a/eth2/utils/slot_clock/src/testing_slot_clock.rs b/eth2/utils/slot_clock/src/testing_slot_clock.rs index ab00d2baa..f741d3b87 100644 --- a/eth2/utils/slot_clock/src/testing_slot_clock.rs +++ b/eth2/utils/slot_clock/src/testing_slot_clock.rs @@ -40,6 +40,10 @@ impl SlotClock for TestingSlotClock { fn duration_to_next_slot(&self) -> Result, Error> { Ok(Some(Duration::from_secs(1))) } + + fn slot_duration_millis(&self) -> u64 { + 0 + } } #[cfg(test)] From 7165598b7fe3346ece3420bf808d14391106295a Mon Sep 17 00:00:00 2001 From: Paul Hauner Date: Mon, 12 Aug 2019 18:19:50 +1000 Subject: [PATCH 19/26] Add lighthouse_metrics gather fn --- beacon_node/rest_api/src/metrics.rs | 16 +++++++++------- eth2/utils/lighthouse_metrics/src/lib.rs | 4 ++++ 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/beacon_node/rest_api/src/metrics.rs b/beacon_node/rest_api/src/metrics.rs index f0ccef5f8..064359337 100644 --- a/beacon_node/rest_api/src/metrics.rs +++ b/beacon_node/rest_api/src/metrics.rs @@ -46,20 +46,22 @@ pub fn get_prometheus(req: Request) -> ApiR // - Statically updated: things which are only updated at the time of the scrape (used where we // can avoid cluttering up code with metrics calls). // - // The `prometheus` crate has a `DEFAULT_REGISTRY` global singleton (via `lazy_static`) which - // keeps the state of all the metrics. Dynamically updated things will already be up-to-date in - // the registry (because they update themselves) however statically updated things need to be - // "scraped". + // The `lighthouse_metrics` crate has a `DEFAULT_REGISTRY` global singleton (via `lazy_static`) + // which keeps the state of all the metrics. Dynamically updated things will already be + // up-to-date in the registry (because they update themselves) however statically updated + // things need to be "scraped". // // We proceed by, first updating all the static metrics using `scrape_for_metrics(..)`. Then, - // using `prometheus::gather(..)` to collect the global `DEFAULT_REGISTRY` metrics into a - // string that can be returned via HTTP. + // using `lighthouse_metrics::gather(..)` to collect the global `DEFAULT_REGISTRY` metrics into + // a string that can be returned via HTTP. slot_clock::scrape_for_metrics::(&beacon_chain.slot_clock); store::scrape_for_metrics(&db_path); beacon_chain::scrape_for_metrics(&beacon_chain); - encoder.encode(&prometheus::gather(), &mut buffer).unwrap(); + encoder + .encode(&lighthouse_metrics::gather(), &mut buffer) + .unwrap(); String::from_utf8(buffer) .map(|string| success_response(Body::from(string))) diff --git a/eth2/utils/lighthouse_metrics/src/lib.rs b/eth2/utils/lighthouse_metrics/src/lib.rs index a8656d017..c9e66e971 100644 --- a/eth2/utils/lighthouse_metrics/src/lib.rs +++ b/eth2/utils/lighthouse_metrics/src/lib.rs @@ -2,6 +2,10 @@ use prometheus::{HistogramOpts, HistogramTimer, Opts}; pub use prometheus::{Histogram, IntCounter, IntGauge, Result}; +pub fn gather() -> Vec { + prometheus::gather() +} + pub fn try_create_int_counter(name: &str, help: &str) -> Result { let opts = Opts::new(name, help); let counter = IntCounter::with_opts(opts)?; From d5d60874e5e38368e8e538f5d4f4a89b30c7423a Mon Sep 17 00:00:00 2001 From: Paul Hauner Date: Mon, 12 Aug 2019 18:20:05 +1000 Subject: [PATCH 20/26] Remove http args --- beacon_node/src/main.rs | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/beacon_node/src/main.rs b/beacon_node/src/main.rs index 2e3ad0691..9a52f2638 100644 --- a/beacon_node/src/main.rs +++ b/beacon_node/src/main.rs @@ -128,28 +128,6 @@ fn main() { .help("Listen port for RPC endpoint.") .takes_value(true), ) - /* - * HTTP server parameters. - */ - .arg( - Arg::with_name("http") - .long("http") - .help("Enable the HTTP server.") - .takes_value(false), - ) - .arg( - Arg::with_name("http-address") - .long("http-address") - .value_name("Address") - .help("Listen address for the HTTP server.") - .takes_value(true), - ) - .arg( - Arg::with_name("http-port") - .long("http-port") - .help("Listen port for the HTTP server.") - .takes_value(true), - ) /* Client related arguments */ .arg( Arg::with_name("api") From 24b2f83713f5e3fd5147e99be44a5f842a6332fb Mon Sep 17 00:00:00 2001 From: Paul Hauner Date: Tue, 13 Aug 2019 07:35:52 +1000 Subject: [PATCH 21/26] Fix wrong state given to op pool prune --- beacon_node/beacon_chain/src/beacon_chain.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/beacon_node/beacon_chain/src/beacon_chain.rs b/beacon_node/beacon_chain/src/beacon_chain.rs index 0e0583309..bed50202d 100644 --- a/beacon_node/beacon_chain/src/beacon_chain.rs +++ b/beacon_node/beacon_chain/src/beacon_chain.rs @@ -1199,8 +1199,12 @@ impl BeaconChain { self.fork_choice .process_finalization(&finalized_block, finalized_block_root)?; - self.op_pool - .prune_all(&self.head().beacon_state, &self.spec); + let finalized_state = self + .store + .get::>(&finalized_block.state_root)? + .ok_or_else(|| Error::MissingBeaconState(finalized_block.state_root))?; + + self.op_pool.prune_all(&finalized_state, &self.spec); Ok(()) } From e369e293a507c602be1901b462001bfd8e4e825c Mon Sep 17 00:00:00 2001 From: Paul Hauner Date: Tue, 13 Aug 2019 09:20:39 +1000 Subject: [PATCH 22/26] Make prom metric names more consistent --- beacon_node/beacon_chain/src/metrics.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/beacon_node/beacon_chain/src/metrics.rs b/beacon_node/beacon_chain/src/metrics.rs index a4b36cd37..574fbb4a4 100644 --- a/beacon_node/beacon_chain/src/metrics.rs +++ b/beacon_node/beacon_chain/src/metrics.rs @@ -15,7 +15,7 @@ lazy_static! { "Count of blocks processed without error" ); pub static ref BLOCK_PROCESSING_TIMES: Result = - try_create_histogram("block_processing_seconds", "Full runtime of block processing"); + try_create_histogram("beacon_block_processing_seconds", "Full runtime of block processing"); pub static ref BLOCK_PROCESSING_DB_READ: Result = try_create_histogram( "beacon_block_processing_db_read_seconds", "Time spent loading block and state from DB for block processing" @@ -123,15 +123,15 @@ lazy_static! { "Count of occasions fork choice has switched to a different chain" ); pub static ref FORK_CHOICE_TIMES: Result = - try_create_histogram("beacon_fork_choice_time", "Full runtime of fork choice"); + try_create_histogram("beacon_fork_choice_seconds", "Full runtime of fork choice"); pub static ref FORK_CHOICE_FIND_HEAD_TIMES: Result = - try_create_histogram("beacon_fork_choice_find_head_time", "Full runtime of fork choice find_head function"); + try_create_histogram("beacon_fork_choice_find_head_seconds", "Full runtime of fork choice find_head function"); pub static ref FORK_CHOICE_PROCESS_BLOCK_TIMES: Result = try_create_histogram( - "beacon_fork_choice_process_block_time", + "beacon_fork_choice_process_block_seconds", "Time taken to add a block and all attestations to fork choice" ); pub static ref FORK_CHOICE_PROCESS_ATTESTATION_TIMES: Result = try_create_histogram( - "beacon_fork_choice_process_attestation_time", + "beacon_fork_choice_process_attestation_seconds", "Time taken to add an attestation to fork choice" ); From b076b07022c9f359315b52700de301d23530e1f0 Mon Sep 17 00:00:00 2001 From: Paul Hauner Date: Tue, 13 Aug 2019 12:11:18 +1000 Subject: [PATCH 23/26] Add more metrics, tidy existing metrics --- beacon_node/beacon_chain/src/beacon_chain.rs | 4 ++ beacon_node/beacon_chain/src/metrics.rs | 4 ++ beacon_node/eth2-libp2p/src/discovery.rs | 8 ++- beacon_node/eth2-libp2p/src/metrics.rs | 14 +++-- beacon_node/store/src/impls.rs | 20 ++++++- beacon_node/store/src/impls/beacon_state.rs | 21 +++++++- beacon_node/store/src/metrics.rs | 57 ++++++++++++++++++++ eth2/utils/slot_clock/src/metrics.rs | 3 ++ 8 files changed, 120 insertions(+), 11 deletions(-) diff --git a/beacon_node/beacon_chain/src/beacon_chain.rs b/beacon_node/beacon_chain/src/beacon_chain.rs index 0e0583309..faffa46f5 100644 --- a/beacon_node/beacon_chain/src/beacon_chain.rs +++ b/beacon_node/beacon_chain/src/beacon_chain.rs @@ -827,8 +827,12 @@ impl BeaconChain { return Ok(BlockProcessingOutcome::GenesisBlock); } + let block_root_timer = metrics::start_timer(&metrics::BLOCK_PROCESSING_BLOCK_ROOT); + let block_root = block.canonical_root(); + metrics::stop_timer(block_root_timer); + if block_root == self.genesis_block_root { return Ok(BlockProcessingOutcome::GenesisBlock); } diff --git a/beacon_node/beacon_chain/src/metrics.rs b/beacon_node/beacon_chain/src/metrics.rs index 574fbb4a4..db213a0cf 100644 --- a/beacon_node/beacon_chain/src/metrics.rs +++ b/beacon_node/beacon_chain/src/metrics.rs @@ -16,6 +16,10 @@ lazy_static! { ); pub static ref BLOCK_PROCESSING_TIMES: Result = try_create_histogram("beacon_block_processing_seconds", "Full runtime of block processing"); + pub static ref BLOCK_PROCESSING_BLOCK_ROOT: Result = try_create_histogram( + "beacon_block_processing_block_root_seconds", + "Time spent calculating the block root when processing a block." + ); pub static ref BLOCK_PROCESSING_DB_READ: Result = try_create_histogram( "beacon_block_processing_db_read_seconds", "Time spent loading block and state from DB for block processing" diff --git a/beacon_node/eth2-libp2p/src/discovery.rs b/beacon_node/eth2-libp2p/src/discovery.rs index d9f2f7465..ca98db324 100644 --- a/beacon_node/eth2-libp2p/src/discovery.rs +++ b/beacon_node/eth2-libp2p/src/discovery.rs @@ -159,13 +159,17 @@ where } fn inject_connected(&mut self, peer_id: PeerId, _endpoint: ConnectedPoint) { - metrics::inc_counter(&metrics::PEER_CONNECT_COUNT); self.connected_peers.insert(peer_id); + + metrics::inc_counter(&metrics::PEER_CONNECT_EVENT_COUNT); + metrics::set_gauge(&metrics::PEERS_CONNECTED, self.connected_peers() as i64); } fn inject_disconnected(&mut self, peer_id: &PeerId, _endpoint: ConnectedPoint) { - metrics::inc_counter(&metrics::PEER_DISCONNECT_COUNT); self.connected_peers.remove(peer_id); + + metrics::inc_counter(&metrics::PEER_DISCONNECT_EVENT_COUNT); + metrics::set_gauge(&metrics::PEERS_CONNECTED, self.connected_peers() as i64); } fn inject_replaced( diff --git a/beacon_node/eth2-libp2p/src/metrics.rs b/beacon_node/eth2-libp2p/src/metrics.rs index a47037669..b678ef6b4 100644 --- a/beacon_node/eth2-libp2p/src/metrics.rs +++ b/beacon_node/eth2-libp2p/src/metrics.rs @@ -2,15 +2,19 @@ pub use lighthouse_metrics::*; lazy_static! { pub static ref ADDRESS_UPDATE_COUNT: Result = try_create_int_counter( - "libp2p_address_update_count", + "libp2p_address_update_total", "Count of libp2p socked updated events (when our view of our IP address has changed)" ); - pub static ref PEER_CONNECT_COUNT: Result = try_create_int_counter( - "libp2p_peer_connect_count", + pub static ref PEERS_CONNECTED: Result = try_create_int_gauge( + "libp2p_peer_connected_peers_total", + "Count of libp2p peers currently connected" + ); + pub static ref PEER_CONNECT_EVENT_COUNT: Result = try_create_int_counter( + "libp2p_peer_connect_event_total", "Count of libp2p peer connect events (not the current number of connected peers)" ); - pub static ref PEER_DISCONNECT_COUNT: Result = try_create_int_counter( - "libp2p_peer_disconnect_count", + pub static ref PEER_DISCONNECT_EVENT_COUNT: Result = try_create_int_counter( + "libp2p_peer_disconnect_event_total", "Count of libp2p peer disconnect events" ); } diff --git a/beacon_node/store/src/impls.rs b/beacon_node/store/src/impls.rs index e88b70f39..1c29c245b 100644 --- a/beacon_node/store/src/impls.rs +++ b/beacon_node/store/src/impls.rs @@ -9,10 +9,26 @@ impl StoreItem for BeaconBlock { } fn as_store_bytes(&self) -> Vec { - self.as_ssz_bytes() + let timer = metrics::start_timer(&metrics::BEACON_STATE_WRITE_TIMES); + let bytes = self.as_ssz_bytes(); + + metrics::stop_timer(timer); + metrics::inc_counter(&metrics::BEACON_STATE_WRITE_COUNT); + metrics::inc_counter_by(&metrics::BEACON_STATE_WRITE_BYTES, bytes.len() as i64); + + bytes } fn from_store_bytes(bytes: &mut [u8]) -> Result { - Self::from_ssz_bytes(bytes).map_err(Into::into) + let timer = metrics::start_timer(&metrics::BEACON_STATE_READ_TIMES); + + let len = bytes.len(); + let result = Self::from_ssz_bytes(bytes).map_err(Into::into); + + metrics::stop_timer(timer); + metrics::inc_counter(&metrics::BEACON_STATE_READ_COUNT); + metrics::inc_counter_by(&metrics::BEACON_STATE_READ_BYTES, len as i64); + + result } } diff --git a/beacon_node/store/src/impls/beacon_state.rs b/beacon_node/store/src/impls/beacon_state.rs index 591663fe0..69e83cd63 100644 --- a/beacon_node/store/src/impls/beacon_state.rs +++ b/beacon_node/store/src/impls/beacon_state.rs @@ -53,12 +53,29 @@ impl StoreItem for BeaconState { } fn as_store_bytes(&self) -> Vec { + let timer = metrics::start_timer(&metrics::BEACON_STATE_WRITE_TIMES); + let container = StorageContainer::new(self); - container.as_ssz_bytes() + let bytes = container.as_ssz_bytes(); + + metrics::stop_timer(timer); + metrics::inc_counter(&metrics::BEACON_STATE_WRITE_COUNT); + metrics::inc_counter_by(&metrics::BEACON_STATE_WRITE_BYTES, bytes.len() as i64); + + bytes } fn from_store_bytes(bytes: &mut [u8]) -> Result { + let timer = metrics::start_timer(&metrics::BEACON_STATE_READ_TIMES); + + let len = bytes.len(); let container = StorageContainer::from_ssz_bytes(bytes)?; - container.try_into() + let result = container.try_into(); + + metrics::stop_timer(timer); + metrics::inc_counter(&metrics::BEACON_STATE_READ_COUNT); + metrics::inc_counter_by(&metrics::BEACON_STATE_READ_BYTES, len as i64); + + result } } diff --git a/beacon_node/store/src/metrics.rs b/beacon_node/store/src/metrics.rs index 30cbb878b..90237824d 100644 --- a/beacon_node/store/src/metrics.rs +++ b/beacon_node/store/src/metrics.rs @@ -4,6 +4,9 @@ use std::fs; use std::path::PathBuf; lazy_static! { + /* + * General + */ pub static ref DISK_DB_SIZE: Result = try_create_int_gauge("store_disk_db_size", "Size of the on-disk database (bytes)"); pub static ref DISK_DB_WRITE_BYTES: Result = try_create_int_counter( @@ -30,6 +33,60 @@ lazy_static! { "store_disk_db_delete_count_total", "Total number of deletions from the on-disk DB" ); + /* + * Beacon State + */ + pub static ref BEACON_STATE_READ_TIMES: Result = try_create_histogram( + "store_beacon_state_read_overhead_seconds", + "Overhead on reading a beacon state from the DB (e.g., decoding)" + ); + pub static ref BEACON_STATE_READ_COUNT: Result = try_create_int_counter( + "store_beacon_state_read_total", + "Total number of beacon state reads from the DB" + ); + pub static ref BEACON_STATE_READ_BYTES: Result = try_create_int_counter( + "store_beacon_state_read_bytes_total", + "Total number of beacon state bytes read from the DB" + ); + pub static ref BEACON_STATE_WRITE_TIMES: Result = try_create_histogram( + "store_beacon_state_write_overhead_seconds", + "Overhead on writing a beacon state to the DB (e.g., encoding)" + ); + pub static ref BEACON_STATE_WRITE_COUNT: Result = try_create_int_counter( + "store_beacon_state_write_total", + "Total number of beacon state writes the DB" + ); + pub static ref BEACON_STATE_WRITE_BYTES: Result = try_create_int_counter( + "store_beacon_state_write_bytes_total", + "Total number of beacon state bytes written to the DB" + ); + /* + * Beacon Block + */ + pub static ref BEACON_BLOCK_READ_TIMES: Result = try_create_histogram( + "store_beacon_block_read_overhead_seconds", + "Overhead on reading a beacon block from the DB (e.g., decoding)" + ); + pub static ref BEACON_BLOCK_READ_COUNT: Result = try_create_int_counter( + "store_beacon_block_read_total", + "Total number of beacon block reads from the DB" + ); + pub static ref BEACON_BLOCK_READ_BYTES: Result = try_create_int_counter( + "store_beacon_block_read_bytes_total", + "Total number of beacon block bytes read from the DB" + ); + pub static ref BEACON_BLOCK_WRITE_TIMES: Result = try_create_histogram( + "store_beacon_block_write_overhead_seconds", + "Overhead on writing a beacon block to the DB (e.g., encoding)" + ); + pub static ref BEACON_BLOCK_WRITE_COUNT: Result = try_create_int_counter( + "store_beacon_block_write_total", + "Total number of beacon block writes the DB" + ); + pub static ref BEACON_BLOCK_WRITE_BYTES: Result = try_create_int_counter( + "store_beacon_block_write_bytes_total", + "Total number of beacon block bytes written to the DB" + ); } /// Updates the global metrics registry with store-related information. diff --git a/eth2/utils/slot_clock/src/metrics.rs b/eth2/utils/slot_clock/src/metrics.rs index a9153a10c..e0d3923e0 100644 --- a/eth2/utils/slot_clock/src/metrics.rs +++ b/eth2/utils/slot_clock/src/metrics.rs @@ -7,6 +7,8 @@ lazy_static! { try_create_int_gauge("slotclock_present_slot", "The present wall-clock slot"); pub static ref PRESENT_EPOCH: Result = try_create_int_gauge("slotclock_present_epoch", "The present wall-clock epoch"); + pub static ref SLOTS_PER_EPOCH: Result = + try_create_int_gauge("slotclock_slots_per_epoch", "Slots per epoch (constant)"); pub static ref MILLISECONDS_PER_SLOT: Result = try_create_int_gauge( "slotclock_slot_time_milliseconds", "The duration in milliseconds between each slot" @@ -25,5 +27,6 @@ pub fn scrape_for_metrics(clock: &U) { &PRESENT_EPOCH, present_slot.epoch(T::slots_per_epoch()).as_u64() as i64, ); + set_gauge(&SLOTS_PER_EPOCH, T::slots_per_epoch() as i64); set_gauge(&MILLISECONDS_PER_SLOT, clock.slot_duration_millis() as i64); } From a3e464078af39e10132bac3d1ac37dbebae8b41a Mon Sep 17 00:00:00 2001 From: Paul Hauner Date: Tue, 13 Aug 2019 13:00:01 +1000 Subject: [PATCH 24/26] Fix store block read metrics --- beacon_node/store/src/impls.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/beacon_node/store/src/impls.rs b/beacon_node/store/src/impls.rs index 1c29c245b..ed724480c 100644 --- a/beacon_node/store/src/impls.rs +++ b/beacon_node/store/src/impls.rs @@ -9,25 +9,25 @@ impl StoreItem for BeaconBlock { } fn as_store_bytes(&self) -> Vec { - let timer = metrics::start_timer(&metrics::BEACON_STATE_WRITE_TIMES); + let timer = metrics::start_timer(&metrics::BEACON_BLOCK_WRITE_TIMES); let bytes = self.as_ssz_bytes(); metrics::stop_timer(timer); - metrics::inc_counter(&metrics::BEACON_STATE_WRITE_COUNT); - metrics::inc_counter_by(&metrics::BEACON_STATE_WRITE_BYTES, bytes.len() as i64); + metrics::inc_counter(&metrics::BEACON_BLOCK_WRITE_COUNT); + metrics::inc_counter_by(&metrics::BEACON_BLOCK_WRITE_BYTES, bytes.len() as i64); bytes } fn from_store_bytes(bytes: &mut [u8]) -> Result { - let timer = metrics::start_timer(&metrics::BEACON_STATE_READ_TIMES); + let timer = metrics::start_timer(&metrics::BEACON_BLOCK_READ_TIMES); let len = bytes.len(); let result = Self::from_ssz_bytes(bytes).map_err(Into::into); metrics::stop_timer(timer); - metrics::inc_counter(&metrics::BEACON_STATE_READ_COUNT); - metrics::inc_counter_by(&metrics::BEACON_STATE_READ_BYTES, len as i64); + metrics::inc_counter(&metrics::BEACON_BLOCK_READ_COUNT); + metrics::inc_counter_by(&metrics::BEACON_BLOCK_READ_BYTES, len as i64); result } From 341a83b9e8d5f3733b09ac9dae2e8aa6d5602ef5 Mon Sep 17 00:00:00 2001 From: Paul Hauner Date: Tue, 13 Aug 2019 16:17:11 +1000 Subject: [PATCH 25/26] Tidy attestation metrics --- beacon_node/beacon_chain/src/beacon_chain.rs | 28 +++++++++++++++----- beacon_node/beacon_chain/src/metrics.rs | 4 +++ 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/beacon_node/beacon_chain/src/beacon_chain.rs b/beacon_node/beacon_chain/src/beacon_chain.rs index faffa46f5..0cb6d5f98 100644 --- a/beacon_node/beacon_chain/src/beacon_chain.rs +++ b/beacon_node/beacon_chain/src/beacon_chain.rs @@ -547,11 +547,14 @@ impl BeaconChain { &self, attestation: Attestation, ) -> Result { + metrics::inc_counter(&metrics::ATTESTATION_PROCESSING_REQUESTS); + let timer = metrics::start_timer(&metrics::ATTESTATION_PROCESSING_TIMES); + // From the store, load the attestation's "head block". // // An honest validator would have set this block to be the head of the chain (i.e., the // result of running fork choice). - if let Some(attestation_head_block) = self + let result = if let Some(attestation_head_block) = self .store .get::>(&attestation.data.beacon_block_root)? { @@ -680,7 +683,15 @@ impl BeaconChain { Ok(AttestationProcessingOutcome::UnknownHeadBlock { beacon_block_root: attestation.data.beacon_block_root, }) + }; + + metrics::stop_timer(timer); + + if let Ok(AttestationProcessingOutcome::Processed) = &result { + metrics::inc_counter(&metrics::ATTESTATION_PROCESSING_SUCCESSES); } + + result } /// Verifies the `attestation` against the `state` to which it is attesting. @@ -707,9 +718,6 @@ impl BeaconChain { state: &BeaconState, block: &BeaconBlock, ) -> Result { - metrics::inc_counter(&metrics::ATTESTATION_PROCESSING_REQUESTS); - let timer = metrics::start_timer(&metrics::ATTESTATION_PROCESSING_TIMES); - // Find the highest between: // // - The highest valid finalized epoch we've ever seen (i.e., the head). @@ -719,6 +727,16 @@ impl BeaconChain { state.finalized_checkpoint.epoch, ); + // A helper function to allow attestation processing to be metered. + let verify_attestation_for_state = |state, attestation, spec, verify_signatures| { + let timer = metrics::start_timer(&metrics::ATTESTATION_PROCESSING_CORE); + + let result = verify_attestation_for_state(state, attestation, spec, verify_signatures); + + metrics::stop_timer(timer); + result + }; + let result = if block.slot <= finalized_epoch.start_slot(T::EthSpec::slots_per_epoch()) { // Ignore any attestation where the slot of `data.beacon_block_root` is equal to or // prior to the finalized epoch. @@ -758,8 +776,6 @@ impl BeaconChain { Ok(AttestationProcessingOutcome::Processed) }; - timer.map(|t| t.observe_duration()); - result } diff --git a/beacon_node/beacon_chain/src/metrics.rs b/beacon_node/beacon_chain/src/metrics.rs index db213a0cf..6efa4b3f2 100644 --- a/beacon_node/beacon_chain/src/metrics.rs +++ b/beacon_node/beacon_chain/src/metrics.rs @@ -90,6 +90,10 @@ lazy_static! { "beacon_attestation_processing_seconds", "Full runtime of attestation processing" ); + pub static ref ATTESTATION_PROCESSING_CORE: Result = try_create_histogram( + "beacon_attestation_processing_core_seconds", + "Time spent on the core spec processing of attestation processing" + ); /* * Attestation Production From b7e43b56f9dc4167414c61d6b52238782e0caf47 Mon Sep 17 00:00:00 2001 From: Paul Hauner Date: Tue, 13 Aug 2019 19:37:14 +1000 Subject: [PATCH 26/26] Fix minor PR comments --- beacon_node/beacon_chain/src/beacon_chain.rs | 6 +++--- eth2/lmd_ghost/src/reduced_tree.rs | 6 +----- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/beacon_node/beacon_chain/src/beacon_chain.rs b/beacon_node/beacon_chain/src/beacon_chain.rs index 73ebb7007..76442fb8d 100644 --- a/beacon_node/beacon_chain/src/beacon_chain.rs +++ b/beacon_node/beacon_chain/src/beacon_chain.rs @@ -536,7 +536,7 @@ impl BeaconChain { /// If valid, the attestation is added to `self.op_pool` and `self.fork_choice`. /// /// Returns an `Ok(AttestationProcessingOutcome)` if the chain was able to make a determination - /// about the `attestation` (wether it was invalid or not). Returns an `Err` if the was an + /// about the `attestation` (whether it was invalid or not). Returns an `Err` if there was an /// error during this process and no determination was able to be made. /// /// ## Notes @@ -620,7 +620,7 @@ impl BeaconChain { outcome } else { // Use the `data.beacon_block_root` to load the state from the latest non-skipped - // slot preceding the attestations creation. + // slot preceding the attestation's creation. // // This state is guaranteed to be in the same chain as the attestation, but it's // not guaranteed to be from the same slot or epoch as the attestation. @@ -703,7 +703,7 @@ impl BeaconChain { /// The given `state` must fulfil one of the following conditions: /// /// - `state` corresponds to the `block.state_root` identified by - /// `attestation.data.beacon_block_root`. (Viz., `attestation` was created using `state`. + /// `attestation.data.beacon_block_root`. (Viz., `attestation` was created using `state`). /// - `state.slot` is in the same epoch as `data.target.epoch` and /// `attestation.data.beacon_block_root` is in the history of `state`. /// diff --git a/eth2/lmd_ghost/src/reduced_tree.rs b/eth2/lmd_ghost/src/reduced_tree.rs index 822c388f6..deda02e1f 100644 --- a/eth2/lmd_ghost/src/reduced_tree.rs +++ b/eth2/lmd_ghost/src/reduced_tree.rs @@ -777,11 +777,7 @@ where } pub fn get_ref(&self, i: usize) -> Option<&T> { - if i < self.0.len() { - Some(&self.0[i]) - } else { - None - } + self.0.get(i) } pub fn insert(&mut self, i: usize, element: T) {