diff --git a/Cargo.toml b/Cargo.toml index f5ee02a17..f087539e6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,6 +11,7 @@ members = [ "eth2/utils/eth2_interop_keypairs", "eth2/utils/logging", "eth2/utils/eth2_hashing", + "eth2/utils/lighthouse_metrics", "eth2/utils/merkle_proof", "eth2/utils/int_to_bytes", "eth2/utils/serde_hex", @@ -25,7 +26,6 @@ members = [ "beacon_node", "beacon_node/store", "beacon_node/client", - "beacon_node/http_server", "beacon_node/rest_api", "beacon_node/network", "beacon_node/eth2-libp2p", diff --git a/beacon_node/beacon_chain/Cargo.toml b/beacon_node/beacon_chain/Cargo.toml index 778224a3d..1d3fc03b8 100644 --- a/beacon_node/beacon_chain/Cargo.toml +++ b/beacon_node/beacon_chain/Cargo.toml @@ -7,7 +7,8 @@ edition = "2018" [dependencies] store = { path = "../store" } parking_lot = "0.7" -prometheus = "^0.6" +lazy_static = "1.3.0" +lighthouse_metrics = { path = "../../eth2/utils/lighthouse_metrics" } log = "0.4" operation_pool = { path = "../../eth2/operation_pool" } serde = "1.0" diff --git a/beacon_node/beacon_chain/src/beacon_chain.rs b/beacon_node/beacon_chain/src/beacon_chain.rs index 96d306530..891f76d37 100644 --- a/beacon_node/beacon_chain/src/beacon_chain.rs +++ b/beacon_node/beacon_chain/src/beacon_chain.rs @@ -2,7 +2,7 @@ use crate::checkpoint::CheckPoint; use crate::errors::{BeaconChainError as Error, BlockProductionError}; use crate::fork_choice::{Error as ForkChoiceError, ForkChoice}; use crate::iter::{ReverseBlockRootIterator, ReverseStateRootIterator}; -use crate::metrics::Metrics; +use crate::metrics; use crate::persisted_beacon_chain::{PersistedBeaconChain, BEACON_CHAIN_DB_KEY}; use lmd_ghost::LmdGhost; use log::trace; @@ -106,8 +106,6 @@ pub struct BeaconChain { /// A state-machine that is updated with information from the network and chooses a canonical /// head block. pub fork_choice: ForkChoice, - /// Stores metrics about this `BeaconChain`. - pub metrics: Metrics, /// Logging to CLI, etc. log: Logger, } @@ -157,7 +155,6 @@ impl BeaconChain { canonical_head, genesis_block_root, fork_choice: ForkChoice::new(store.clone(), &genesis_block, genesis_block_root), - metrics: Metrics::new()?, store, log, }) @@ -195,7 +192,6 @@ impl BeaconChain { canonical_head: RwLock::new(p.canonical_head), state: RwLock::new(p.state), genesis_block_root: p.genesis_block_root, - metrics: Metrics::new()?, store, log, })) @@ -203,6 +199,8 @@ impl BeaconChain { /// Attempt to save this instance to `self.store`. pub fn persist(&self) -> Result<(), Error> { + let timer = metrics::start_timer(&metrics::PERSIST_CHAIN); + let p: PersistedBeaconChain = PersistedBeaconChain { canonical_head: self.canonical_head.read().clone(), op_pool: PersistedOperationPool::from_operation_pool(&self.op_pool), @@ -213,6 +211,8 @@ impl BeaconChain { let key = Hash256::from_slice(&BEACON_CHAIN_DB_KEY.as_bytes()); self.store.put(&key, &p)?; + metrics::stop_timer(timer); + Ok(()) } @@ -472,8 +472,8 @@ impl BeaconChain { state: &BeaconState, ) -> Result { // Collect some metrics. - self.metrics.attestation_production_requests.inc(); - let timer = self.metrics.attestation_production_times.start_timer(); + metrics::inc_counter(&metrics::ATTESTATION_PRODUCTION_REQUESTS); + let timer = metrics::start_timer(&metrics::ATTESTATION_PRODUCTION_TIMES); let slots_per_epoch = T::EthSpec::slots_per_epoch(); let current_epoch_start_slot = state.current_epoch().start_slot(slots_per_epoch); @@ -520,8 +520,8 @@ impl BeaconChain { }; // Collect some metrics. - self.metrics.attestation_production_successes.inc(); - timer.observe_duration(); + metrics::inc_counter(&metrics::ATTESTATION_PRODUCTION_SUCCESSES); + metrics::stop_timer(timer); Ok(AttestationData { beacon_block_root: head_block_root, @@ -547,11 +547,14 @@ impl BeaconChain { &self, attestation: Attestation, ) -> Result { + metrics::inc_counter(&metrics::ATTESTATION_PROCESSING_REQUESTS); + let timer = metrics::start_timer(&metrics::ATTESTATION_PROCESSING_TIMES); + // From the store, load the attestation's "head block". // // An honest validator would have set this block to be the head of the chain (i.e., the // result of running fork choice). - if let Some(attestation_head_block) = self + let result = if let Some(attestation_head_block) = self .store .get::>(&attestation.data.beacon_block_root)? { @@ -657,7 +660,15 @@ impl BeaconChain { Ok(AttestationProcessingOutcome::UnknownHeadBlock { beacon_block_root: attestation.data.beacon_block_root, }) + }; + + metrics::stop_timer(timer); + + if let Ok(AttestationProcessingOutcome::Processed) = &result { + metrics::inc_counter(&metrics::ATTESTATION_PROCESSING_SUCCESSES); } + + result } /// Verifies the `attestation` against the `state` to which it is attesting. @@ -684,9 +695,6 @@ impl BeaconChain { state: &BeaconState, block: &BeaconBlock, ) -> Result { - self.metrics.attestation_processing_requests.inc(); - let timer = self.metrics.attestation_processing_times.start_timer(); - // Find the highest between: // // - The highest valid finalized epoch we've ever seen (i.e., the head). @@ -696,7 +704,17 @@ impl BeaconChain { state.finalized_checkpoint.epoch, ); - let result = if block.slot <= finalized_epoch.start_slot(T::EthSpec::slots_per_epoch()) { + // A helper function to allow attestation processing to be metered. + let verify_attestation_for_state = |state, attestation, spec, verify_signatures| { + let timer = metrics::start_timer(&metrics::ATTESTATION_PROCESSING_CORE); + + let result = verify_attestation_for_state(state, attestation, spec, verify_signatures); + + metrics::stop_timer(timer); + result + }; + + if block.slot <= finalized_epoch.start_slot(T::EthSpec::slots_per_epoch()) { // Ignore any attestation where the slot of `data.beacon_block_root` is equal to or // prior to the finalized epoch. // @@ -730,14 +748,10 @@ impl BeaconChain { .insert_attestation(attestation, state, &self.spec)?; // Update the metrics. - self.metrics.attestation_processing_successes.inc(); + metrics::inc_counter(&metrics::ATTESTATION_PROCESSING_SUCCESSES); Ok(AttestationProcessingOutcome::Processed) - }; - - timer.observe_duration(); - - result + } } /// Accept some deposit and queue it for inclusion in an appropriate block. @@ -786,8 +800,8 @@ impl BeaconChain { &self, block: BeaconBlock, ) -> Result { - self.metrics.block_processing_requests.inc(); - let timer = self.metrics.block_processing_times.start_timer(); + metrics::inc_counter(&metrics::BLOCK_PROCESSING_REQUESTS); + let full_timer = metrics::start_timer(&metrics::BLOCK_PROCESSING_TIMES); let finalized_slot = self .state @@ -804,8 +818,12 @@ impl BeaconChain { return Ok(BlockProcessingOutcome::GenesisBlock); } + let block_root_timer = metrics::start_timer(&metrics::BLOCK_PROCESSING_BLOCK_ROOT); + let block_root = block.canonical_root(); + metrics::stop_timer(block_root_timer); + if block_root == self.genesis_block_root { return Ok(BlockProcessingOutcome::GenesisBlock); } @@ -825,6 +843,10 @@ impl BeaconChain { return Ok(BlockProcessingOutcome::BlockIsAlreadyKnown); } + // Records the time taken to load the block and state from the database during block + // processing. + let db_read_timer = metrics::start_timer(&metrics::BLOCK_PROCESSING_DB_READ); + // Load the blocks parent block from the database, returning invalid if that block is not // found. let parent_block: BeaconBlock = match self.store.get(&block.parent_root)? { @@ -844,15 +866,27 @@ impl BeaconChain { .get(&parent_state_root)? .ok_or_else(|| Error::DBInconsistent(format!("Missing state {}", parent_state_root)))?; + metrics::stop_timer(db_read_timer); + + let catchup_timer = metrics::start_timer(&metrics::BLOCK_PROCESSING_CATCHUP_STATE); + // Transition the parent state to the block slot. let mut state: BeaconState = parent_state; for _ in state.slot.as_u64()..block.slot.as_u64() { per_slot_processing(&mut state, &self.spec)?; } + metrics::stop_timer(catchup_timer); + + let commitee_timer = metrics::start_timer(&metrics::BLOCK_PROCESSING_COMMITTEE); + state.build_committee_cache(RelativeEpoch::Previous, &self.spec)?; state.build_committee_cache(RelativeEpoch::Current, &self.spec)?; + metrics::stop_timer(commitee_timer); + + let core_timer = metrics::start_timer(&metrics::BLOCK_PROCESSING_CORE); + // Apply the received block to its parent state (which has been transitioned into this // slot). match per_block_processing(&mut state, &block, &self.spec) { @@ -863,16 +897,29 @@ impl BeaconChain { _ => {} } + metrics::stop_timer(core_timer); + + let state_root_timer = metrics::start_timer(&metrics::BLOCK_PROCESSING_STATE_ROOT); + let state_root = state.canonical_root(); if block.state_root != state_root { return Ok(BlockProcessingOutcome::StateRootMismatch); } + metrics::stop_timer(state_root_timer); + + let db_write_timer = metrics::start_timer(&metrics::BLOCK_PROCESSING_DB_WRITE); + // Store the block and state. self.store.put(&block_root, &block)?; self.store.put(&state_root, &state)?; + metrics::stop_timer(db_write_timer); + + let fork_choice_register_timer = + metrics::start_timer(&metrics::BLOCK_PROCESSING_FORK_CHOICE_REGISTER); + // Register the new block with the fork choice service. if let Err(e) = self.fork_choice.process_block(&state, &block, block_root) { error!( @@ -884,6 +931,11 @@ impl BeaconChain { ) } + metrics::stop_timer(fork_choice_register_timer); + + let find_head_timer = + metrics::start_timer(&metrics::BLOCK_PROCESSING_FORK_CHOICE_FIND_HEAD); + // Execute the fork choice algorithm, enthroning a new head if discovered. // // Note: in the future we may choose to run fork-choice less often, potentially based upon @@ -896,11 +948,14 @@ impl BeaconChain { ) }; - self.metrics.block_processing_successes.inc(); - self.metrics - .operations_per_block_attestation - .observe(block.body.attestations.len() as f64); - timer.observe_duration(); + metrics::stop_timer(find_head_timer); + + metrics::inc_counter(&metrics::BLOCK_PROCESSING_SUCCESSES); + metrics::observe( + &metrics::OPERATIONS_PER_BLOCK_ATTESTATION, + block.body.attestations.len() as f64, + ); + metrics::stop_timer(full_timer); Ok(BlockProcessingOutcome::Processed { block_root }) } @@ -935,8 +990,8 @@ impl BeaconChain { produce_at_slot: Slot, randao_reveal: Signature, ) -> Result<(BeaconBlock, BeaconState), BlockProductionError> { - self.metrics.block_production_requests.inc(); - let timer = self.metrics.block_production_times.start_timer(); + metrics::inc_counter(&metrics::BLOCK_PRODUCTION_REQUESTS); + let timer = metrics::start_timer(&metrics::BLOCK_PRODUCTION_TIMES); // If required, transition the new state to the present slot. while state.slot < produce_at_slot { @@ -988,28 +1043,25 @@ impl BeaconChain { block.state_root = state_root; - self.metrics.block_production_successes.inc(); - timer.observe_duration(); + metrics::inc_counter(&metrics::BLOCK_PRODUCTION_SUCCESSES); + metrics::stop_timer(timer); Ok((block, state)) } /// Execute the fork choice algorithm and enthrone the result as the canonical head. pub fn fork_choice(&self) -> Result<(), Error> { - self.metrics.fork_choice_requests.inc(); + metrics::inc_counter(&metrics::FORK_CHOICE_REQUESTS); // Start fork choice metrics timer. - let timer = self.metrics.fork_choice_times.start_timer(); + let timer = metrics::start_timer(&metrics::FORK_CHOICE_TIMES); // Determine the root of the block that is the head of the chain. let beacon_block_root = self.fork_choice.find_head(&self)?; - // End fork choice metrics timer. - timer.observe_duration(); - // If a new head was chosen. - if beacon_block_root != self.head().beacon_block_root { - self.metrics.fork_choice_changed_head.inc(); + let result = if beacon_block_root != self.head().beacon_block_root { + metrics::inc_counter(&metrics::FORK_CHOICE_CHANGED_HEAD); let beacon_block: BeaconBlock = self .store @@ -1027,7 +1079,7 @@ impl BeaconChain { // If we switched to a new chain (instead of building atop the present chain). if self.head().beacon_block_root != beacon_block.parent_root { - self.metrics.fork_choice_reorg_count.inc(); + metrics::inc_counter(&metrics::FORK_CHOICE_REORG_COUNT); warn!( self.log, "Beacon chain re-org"; @@ -1071,11 +1123,22 @@ impl BeaconChain { } } else { Ok(()) + }; + + // End fork choice metrics timer. + metrics::stop_timer(timer); + + if let Err(_) = result { + metrics::inc_counter(&metrics::FORK_CHOICE_ERRORS); } + + result } /// Update the canonical head to `new_head`. fn update_canonical_head(&self, new_head: CheckPoint) -> Result<(), Error> { + let timer = metrics::start_timer(&metrics::UPDATE_HEAD_TIMES); + // Update the checkpoint that stores the head of the chain at the time it received the // block. *self.canonical_head.write() = new_head; @@ -1102,6 +1165,8 @@ impl BeaconChain { // Save `self` to `self.store`. self.persist()?; + metrics::stop_timer(timer); + Ok(()) } @@ -1129,6 +1194,13 @@ impl BeaconChain { self.fork_choice .process_finalization(&finalized_block, finalized_block_root)?; + let finalized_state = self + .store + .get::>(&finalized_block.state_root)? + .ok_or_else(|| Error::MissingBeaconState(finalized_block.state_root))?; + + self.op_pool.prune_all(&finalized_state, &self.spec); + Ok(()) } } diff --git a/beacon_node/beacon_chain/src/errors.rs b/beacon_node/beacon_chain/src/errors.rs index 7a51fc425..22df90397 100644 --- a/beacon_node/beacon_chain/src/errors.rs +++ b/beacon_node/beacon_chain/src/errors.rs @@ -1,5 +1,4 @@ use crate::fork_choice::Error as ForkChoiceError; -use crate::metrics::Error as MetricsError; use state_processing::per_block_processing::errors::{ AttestationValidationError, IndexedAttestationValidationError, }; @@ -34,7 +33,6 @@ pub enum BeaconChainError { MissingBeaconBlock(Hash256), MissingBeaconState(Hash256), SlotProcessingError(SlotProcessingError), - MetricsError(String), NoStateForAttestation { beacon_block_root: Hash256, }, @@ -44,12 +42,6 @@ pub enum BeaconChainError { easy_from_to!(SlotProcessingError, BeaconChainError); -impl From for BeaconChainError { - fn from(e: MetricsError) -> BeaconChainError { - BeaconChainError::MetricsError(format!("{:?}", e)) - } -} - #[derive(Debug, PartialEq)] pub enum BlockProductionError { UnableToGetBlockRootFromState, diff --git a/beacon_node/beacon_chain/src/fork_choice.rs b/beacon_node/beacon_chain/src/fork_choice.rs index edd426f29..77fdaacdc 100644 --- a/beacon_node/beacon_chain/src/fork_choice.rs +++ b/beacon_node/beacon_chain/src/fork_choice.rs @@ -1,4 +1,4 @@ -use crate::{BeaconChain, BeaconChainTypes}; +use crate::{metrics, BeaconChain, BeaconChainTypes}; use lmd_ghost::LmdGhost; use state_processing::common::get_attesting_indices; use std::sync::Arc; @@ -46,6 +46,8 @@ impl ForkChoice { } pub fn find_head(&self, chain: &BeaconChain) -> Result { + let timer = metrics::start_timer(&metrics::FORK_CHOICE_FIND_HEAD_TIMES); + let start_slot = |epoch: Epoch| epoch.start_slot(T::EthSpec::slots_per_epoch()); // From the specification: @@ -97,9 +99,14 @@ impl ForkChoice { .map(|v| v.effective_balance) }; - self.backend + let result = self + .backend .find_head(start_block_slot, start_block_root, weight) - .map_err(Into::into) + .map_err(Into::into); + + metrics::stop_timer(timer); + + result } /// Process all attestations in the given `block`. @@ -112,6 +119,7 @@ impl ForkChoice { block: &BeaconBlock, block_root: Hash256, ) -> Result<()> { + let timer = metrics::start_timer(&metrics::FORK_CHOICE_PROCESS_BLOCK_TIMES); // Note: we never count the block as a latest message, only attestations. // // I (Paul H) do not have an explicit reference to this, but I derive it from this @@ -136,6 +144,8 @@ impl ForkChoice { // a block that has the majority of votes applied to it. self.backend.process_block(block, block_root)?; + metrics::stop_timer(timer); + Ok(()) } @@ -148,6 +158,8 @@ impl ForkChoice { attestation: &Attestation, block: &BeaconBlock, ) -> Result<()> { + let timer = metrics::start_timer(&metrics::FORK_CHOICE_PROCESS_ATTESTATION_TIMES); + let block_hash = attestation.data.beacon_block_root; // Ignore any attestations to the zero hash. @@ -175,6 +187,8 @@ impl ForkChoice { } } + metrics::stop_timer(timer); + Ok(()) } diff --git a/beacon_node/beacon_chain/src/lib.rs b/beacon_node/beacon_chain/src/lib.rs index 3188760a4..cc7725dd8 100644 --- a/beacon_node/beacon_chain/src/lib.rs +++ b/beacon_node/beacon_chain/src/lib.rs @@ -1,3 +1,7 @@ +#![recursion_limit = "128"] // For lazy-static +#[macro_use] +extern crate lazy_static; + mod beacon_chain; mod checkpoint; mod errors; @@ -13,6 +17,7 @@ pub use self::beacon_chain::{ pub use self::checkpoint::CheckPoint; pub use self::errors::{BeaconChainError, BlockProductionError}; pub use lmd_ghost; +pub use metrics::scrape_for_metrics; pub use parking_lot; pub use slot_clock; pub use state_processing::per_block_processing::errors::{ diff --git a/beacon_node/beacon_chain/src/metrics.rs b/beacon_node/beacon_chain/src/metrics.rs index fa1718ebf..6efa4b3f2 100644 --- a/beacon_node/beacon_chain/src/metrics.rs +++ b/beacon_node/beacon_chain/src/metrics.rs @@ -1,143 +1,276 @@ -pub use prometheus::Error; -use prometheus::{Histogram, HistogramOpts, IntCounter, Opts, Registry}; +use crate::{BeaconChain, BeaconChainTypes}; +pub use lighthouse_metrics::*; +use types::{BeaconState, Epoch, Hash256, Slot}; -pub struct Metrics { - pub block_processing_requests: IntCounter, - pub block_processing_successes: IntCounter, - pub block_processing_times: Histogram, - pub block_production_requests: IntCounter, - pub block_production_successes: IntCounter, - pub block_production_times: Histogram, - pub attestation_production_requests: IntCounter, - pub attestation_production_successes: IntCounter, - pub attestation_production_times: Histogram, - pub attestation_processing_requests: IntCounter, - pub attestation_processing_successes: IntCounter, - pub attestation_processing_times: Histogram, - pub fork_choice_requests: IntCounter, - pub fork_choice_changed_head: IntCounter, - pub fork_choice_reorg_count: IntCounter, - pub fork_choice_times: Histogram, - pub operations_per_block_attestation: Histogram, +lazy_static! { + /* + * Block Processing + */ + pub static ref BLOCK_PROCESSING_REQUESTS: Result = try_create_int_counter( + "beacon_block_processing_requests_total", + "Count of blocks submitted for processing" + ); + pub static ref BLOCK_PROCESSING_SUCCESSES: Result = try_create_int_counter( + "beacon_block_processing_successes_total", + "Count of blocks processed without error" + ); + pub static ref BLOCK_PROCESSING_TIMES: Result = + try_create_histogram("beacon_block_processing_seconds", "Full runtime of block processing"); + pub static ref BLOCK_PROCESSING_BLOCK_ROOT: Result = try_create_histogram( + "beacon_block_processing_block_root_seconds", + "Time spent calculating the block root when processing a block." + ); + pub static ref BLOCK_PROCESSING_DB_READ: Result = try_create_histogram( + "beacon_block_processing_db_read_seconds", + "Time spent loading block and state from DB for block processing" + ); + pub static ref BLOCK_PROCESSING_CATCHUP_STATE: Result = try_create_histogram( + "beacon_block_processing_catch_up_state_seconds", + "Time spent skipping slots on a state before processing a block." + ); + pub static ref BLOCK_PROCESSING_COMMITTEE: Result = try_create_histogram( + "beacon_block_processing_committee_building_seconds", + "Time spent building/obtaining committees for block processing." + ); + pub static ref BLOCK_PROCESSING_CORE: Result = try_create_histogram( + "beacon_block_processing_core_seconds", + "Time spent doing the core per_block_processing state processing." + ); + pub static ref BLOCK_PROCESSING_STATE_ROOT: Result = try_create_histogram( + "beacon_block_processing_state_root_seconds", + "Time spent calculating the state root when processing a block." + ); + pub static ref BLOCK_PROCESSING_DB_WRITE: Result = try_create_histogram( + "beacon_block_processing_db_write_seconds", + "Time spent writing a newly processed block and state to DB" + ); + pub static ref BLOCK_PROCESSING_FORK_CHOICE_REGISTER: Result = try_create_histogram( + "beacon_block_processing_fork_choice_register_seconds", + "Time spent registering the new block with fork choice (but not finding head)" + ); + pub static ref BLOCK_PROCESSING_FORK_CHOICE_FIND_HEAD: Result = try_create_histogram( + "beacon_block_processing_fork_choice_find_head_seconds", + "Time spent finding the new head after processing a new block" + ); + + /* + * Block Production + */ + pub static ref BLOCK_PRODUCTION_REQUESTS: Result = try_create_int_counter( + "beacon_block_production_requests_total", + "Count of all block production requests" + ); + pub static ref BLOCK_PRODUCTION_SUCCESSES: Result = try_create_int_counter( + "beacon_block_production_successes_total", + "Count of blocks successfully produced." + ); + pub static ref BLOCK_PRODUCTION_TIMES: Result = + try_create_histogram("beacon_block_production_seconds", "Full runtime of block production"); + + /* + * Block Statistics + */ + pub static ref OPERATIONS_PER_BLOCK_ATTESTATION: Result = try_create_histogram( + "beacon_operations_per_block_attestation_total", + "Number of attestations in a block" + ); + + /* + * Attestation Processing + */ + pub static ref ATTESTATION_PROCESSING_REQUESTS: Result = try_create_int_counter( + "beacon_attestation_processing_requests_total", + "Count of all attestations submitted for processing" + ); + pub static ref ATTESTATION_PROCESSING_SUCCESSES: Result = try_create_int_counter( + "beacon_attestation_processing_successes_total", + "total_attestation_processing_successes" + ); + pub static ref ATTESTATION_PROCESSING_TIMES: Result = try_create_histogram( + "beacon_attestation_processing_seconds", + "Full runtime of attestation processing" + ); + pub static ref ATTESTATION_PROCESSING_CORE: Result = try_create_histogram( + "beacon_attestation_processing_core_seconds", + "Time spent on the core spec processing of attestation processing" + ); + + /* + * Attestation Production + */ + pub static ref ATTESTATION_PRODUCTION_REQUESTS: Result = try_create_int_counter( + "beacon_attestation_production_requests_total", + "Count of all attestation production requests" + ); + pub static ref ATTESTATION_PRODUCTION_SUCCESSES: Result = try_create_int_counter( + "beacon_attestation_production_successes_total", + "Count of attestations processed without error" + ); + pub static ref ATTESTATION_PRODUCTION_TIMES: Result = try_create_histogram( + "beacon_attestation_production_seconds", + "Full runtime of attestation production" + ); + + /* + * Fork Choice + */ + pub static ref FORK_CHOICE_REQUESTS: Result = try_create_int_counter( + "beacon_fork_choice_requests_total", + "Count of occasions where fork choice has tried to find a head" + ); + pub static ref FORK_CHOICE_ERRORS: Result = try_create_int_counter( + "beacon_fork_choice_errors_total", + "Count of occasions where fork choice has returned an error when trying to find a head" + ); + pub static ref FORK_CHOICE_CHANGED_HEAD: Result = try_create_int_counter( + "beacon_fork_choice_changed_head_total", + "Count of occasions fork choice has found a new head" + ); + pub static ref FORK_CHOICE_REORG_COUNT: Result = try_create_int_counter( + "beacon_fork_choice_reorg_total", + "Count of occasions fork choice has switched to a different chain" + ); + pub static ref FORK_CHOICE_TIMES: Result = + try_create_histogram("beacon_fork_choice_seconds", "Full runtime of fork choice"); + pub static ref FORK_CHOICE_FIND_HEAD_TIMES: Result = + try_create_histogram("beacon_fork_choice_find_head_seconds", "Full runtime of fork choice find_head function"); + pub static ref FORK_CHOICE_PROCESS_BLOCK_TIMES: Result = try_create_histogram( + "beacon_fork_choice_process_block_seconds", + "Time taken to add a block and all attestations to fork choice" + ); + pub static ref FORK_CHOICE_PROCESS_ATTESTATION_TIMES: Result = try_create_histogram( + "beacon_fork_choice_process_attestation_seconds", + "Time taken to add an attestation to fork choice" + ); + + /* + * Persisting BeaconChain to disk + */ + pub static ref PERSIST_CHAIN: Result = + try_create_histogram("beacon_persist_chain", "Time taken to update the canonical head"); + + /* + * Chain Head + */ + pub static ref UPDATE_HEAD_TIMES: Result = + try_create_histogram("beacon_update_head_seconds", "Time taken to update the canonical head"); + pub static ref HEAD_STATE_SLOT: Result = + try_create_int_gauge("beacon_head_state_slot", "Slot of the block at the head of the chain"); + pub static ref HEAD_STATE_ROOT: Result = + try_create_int_gauge("beacon_head_state_root", "Root of the block at the head of the chain"); + pub static ref HEAD_STATE_LATEST_BLOCK_SLOT: Result = + try_create_int_gauge("beacon_head_state_latest_block_slot", "Latest block slot at the head of the chain"); + pub static ref HEAD_STATE_CURRENT_JUSTIFIED_ROOT: Result = + try_create_int_gauge("beacon_head_state_current_justified_root", "Current justified root at the head of the chain"); + pub static ref HEAD_STATE_CURRENT_JUSTIFIED_EPOCH: Result = + try_create_int_gauge("beacon_head_state_current_justified_epoch", "Current justified epoch at the head of the chain"); + pub static ref HEAD_STATE_PREVIOUS_JUSTIFIED_ROOT: Result = + try_create_int_gauge("beacon_head_state_previous_justified_root", "Previous justified root at the head of the chain"); + pub static ref HEAD_STATE_PREVIOUS_JUSTIFIED_EPOCH: Result = + try_create_int_gauge("beacon_head_state_previous_justified_epoch", "Previous justified epoch at the head of the chain"); + pub static ref HEAD_STATE_FINALIZED_ROOT: Result = + try_create_int_gauge("beacon_head_state_finalized_root", "Finalized root at the head of the chain"); + pub static ref HEAD_STATE_FINALIZED_EPOCH: Result = + try_create_int_gauge("beacon_head_state_finalized_epoch", "Finalized epoch at the head of the chain"); + pub static ref HEAD_STATE_SHARDS: Result = + try_create_int_gauge("beacon_head_state_shard_total", "Count of shards in the beacon chain"); + pub static ref HEAD_STATE_TOTAL_VALIDATORS: Result = + try_create_int_gauge("beacon_head_state_total_validators_total", "Count of validators at the head of the chain"); + pub static ref HEAD_STATE_ACTIVE_VALIDATORS: Result = + try_create_int_gauge("beacon_head_state_active_validators_total", "Count of active validators at the head of the chain"); + pub static ref HEAD_STATE_VALIDATOR_BALANCES: Result = + try_create_int_gauge("beacon_head_state_validator_balances_total", "Sum of all validator balances at the head of the chain"); + pub static ref HEAD_STATE_SLASHED_VALIDATORS: Result = + try_create_int_gauge("beacon_head_state_slashed_validators_total", "Count of all slashed validators at the head of the chain"); + pub static ref HEAD_STATE_WITHDRAWN_VALIDATORS: Result = + try_create_int_gauge("beacon_head_state_withdrawn_validators_total", "Sum of all validator balances at the head of the chain"); + pub static ref HEAD_STATE_ETH1_DEPOSIT_INDEX: Result = + try_create_int_gauge("beacon_head_state_eth1_deposit_index", "Eth1 deposit index at the head of the chain"); } -impl Metrics { - pub fn new() -> Result { - Ok(Self { - block_processing_requests: { - let opts = Opts::new("block_processing_requests", "total_blocks_processed"); - IntCounter::with_opts(opts)? - }, - block_processing_successes: { - let opts = Opts::new("block_processing_successes", "total_valid_blocks_processed"); - IntCounter::with_opts(opts)? - }, - block_processing_times: { - let opts = HistogramOpts::new("block_processing_times", "block_processing_time"); - Histogram::with_opts(opts)? - }, - block_production_requests: { - let opts = Opts::new("block_production_requests", "attempts_to_produce_new_block"); - IntCounter::with_opts(opts)? - }, - block_production_successes: { - let opts = Opts::new("block_production_successes", "blocks_successfully_produced"); - IntCounter::with_opts(opts)? - }, - block_production_times: { - let opts = HistogramOpts::new("block_production_times", "block_production_time"); - Histogram::with_opts(opts)? - }, - attestation_production_requests: { - let opts = Opts::new( - "attestation_production_requests", - "total_attestation_production_requests", - ); - IntCounter::with_opts(opts)? - }, - attestation_production_successes: { - let opts = Opts::new( - "attestation_production_successes", - "total_attestation_production_successes", - ); - IntCounter::with_opts(opts)? - }, - attestation_production_times: { - let opts = HistogramOpts::new( - "attestation_production_times", - "attestation_production_time", - ); - Histogram::with_opts(opts)? - }, - attestation_processing_requests: { - let opts = Opts::new( - "attestation_processing_requests", - "total_attestation_processing_requests", - ); - IntCounter::with_opts(opts)? - }, - attestation_processing_successes: { - let opts = Opts::new( - "attestation_processing_successes", - "total_attestation_processing_successes", - ); - IntCounter::with_opts(opts)? - }, - attestation_processing_times: { - let opts = HistogramOpts::new( - "attestation_processing_times", - "attestation_processing_time", - ); - Histogram::with_opts(opts)? - }, - fork_choice_requests: { - let opts = Opts::new("fork_choice_requests", "total_times_fork_choice_called"); - IntCounter::with_opts(opts)? - }, - fork_choice_changed_head: { - let opts = Opts::new( - "fork_choice_changed_head", - "total_times_fork_choice_chose_a_new_head", - ); - IntCounter::with_opts(opts)? - }, - fork_choice_reorg_count: { - let opts = Opts::new("fork_choice_reorg_count", "number_of_reorgs"); - IntCounter::with_opts(opts)? - }, - fork_choice_times: { - let opts = HistogramOpts::new("fork_choice_time", "total_time_to_run_fork_choice"); - Histogram::with_opts(opts)? - }, - operations_per_block_attestation: { - let opts = HistogramOpts::new( - "operations_per_block_attestation", - "count_of_attestations_per_block", - ); - Histogram::with_opts(opts)? - }, - }) - } - - pub fn register(&self, registry: &Registry) -> Result<(), Error> { - registry.register(Box::new(self.block_processing_requests.clone()))?; - registry.register(Box::new(self.block_processing_successes.clone()))?; - registry.register(Box::new(self.block_processing_times.clone()))?; - registry.register(Box::new(self.block_production_requests.clone()))?; - registry.register(Box::new(self.block_production_successes.clone()))?; - registry.register(Box::new(self.block_production_times.clone()))?; - registry.register(Box::new(self.attestation_production_requests.clone()))?; - registry.register(Box::new(self.attestation_production_successes.clone()))?; - registry.register(Box::new(self.attestation_production_times.clone()))?; - registry.register(Box::new(self.attestation_processing_requests.clone()))?; - registry.register(Box::new(self.attestation_processing_successes.clone()))?; - registry.register(Box::new(self.attestation_processing_times.clone()))?; - registry.register(Box::new(self.fork_choice_requests.clone()))?; - registry.register(Box::new(self.fork_choice_changed_head.clone()))?; - registry.register(Box::new(self.fork_choice_reorg_count.clone()))?; - registry.register(Box::new(self.fork_choice_times.clone()))?; - registry.register(Box::new(self.operations_per_block_attestation.clone()))?; - - Ok(()) - } +/// Scrape the `beacon_chain` for metrics that are not constantly updated (e.g., the present slot, +/// head state info, etc) and update the Prometheus `DEFAULT_REGISTRY`. +pub fn scrape_for_metrics(beacon_chain: &BeaconChain) { + scrape_head_state::( + &beacon_chain.head().beacon_state, + beacon_chain.head().beacon_state_root, + ); +} + +/// Scrape the given `state` assuming it's the head state, updating the `DEFAULT_REGISTRY`. +fn scrape_head_state(state: &BeaconState, state_root: Hash256) { + set_gauge_by_slot(&HEAD_STATE_SLOT, state.slot); + set_gauge_by_hash(&HEAD_STATE_ROOT, state_root); + set_gauge_by_slot( + &HEAD_STATE_LATEST_BLOCK_SLOT, + state.latest_block_header.slot, + ); + set_gauge_by_hash( + &HEAD_STATE_CURRENT_JUSTIFIED_ROOT, + state.current_justified_checkpoint.root, + ); + set_gauge_by_epoch( + &HEAD_STATE_CURRENT_JUSTIFIED_EPOCH, + state.current_justified_checkpoint.epoch, + ); + set_gauge_by_hash( + &HEAD_STATE_PREVIOUS_JUSTIFIED_ROOT, + state.previous_justified_checkpoint.root, + ); + set_gauge_by_epoch( + &HEAD_STATE_PREVIOUS_JUSTIFIED_EPOCH, + state.previous_justified_checkpoint.epoch, + ); + set_gauge_by_hash(&HEAD_STATE_FINALIZED_ROOT, state.finalized_checkpoint.root); + set_gauge_by_epoch( + &HEAD_STATE_FINALIZED_EPOCH, + state.finalized_checkpoint.epoch, + ); + set_gauge_by_usize(&HEAD_STATE_SHARDS, state.previous_crosslinks.len()); + set_gauge_by_usize(&HEAD_STATE_TOTAL_VALIDATORS, state.validators.len()); + set_gauge_by_u64( + &HEAD_STATE_VALIDATOR_BALANCES, + state.balances.iter().fold(0_u64, |acc, i| acc + i), + ); + set_gauge_by_usize( + &HEAD_STATE_ACTIVE_VALIDATORS, + state + .validators + .iter() + .filter(|v| v.is_active_at(state.current_epoch())) + .count(), + ); + set_gauge_by_usize( + &HEAD_STATE_SLASHED_VALIDATORS, + state.validators.iter().filter(|v| v.slashed).count(), + ); + set_gauge_by_usize( + &HEAD_STATE_WITHDRAWN_VALIDATORS, + state + .validators + .iter() + .filter(|v| v.is_withdrawable_at(state.current_epoch())) + .count(), + ); + set_gauge_by_u64(&HEAD_STATE_ETH1_DEPOSIT_INDEX, state.eth1_deposit_index); +} + +fn set_gauge_by_slot(gauge: &Result, value: Slot) { + set_gauge(gauge, value.as_u64() as i64); +} + +fn set_gauge_by_epoch(gauge: &Result, value: Epoch) { + set_gauge(gauge, value.as_u64() as i64); +} + +fn set_gauge_by_hash(gauge: &Result, value: Hash256) { + set_gauge(gauge, value.to_low_u64_le() as i64); +} + +fn set_gauge_by_usize(gauge: &Result, value: usize) { + set_gauge(gauge, value as i64); +} + +fn set_gauge_by_u64(gauge: &Result, value: u64) { + set_gauge(gauge, value as i64); } diff --git a/beacon_node/client/Cargo.toml b/beacon_node/client/Cargo.toml index b0524b17d..9aa3557a9 100644 --- a/beacon_node/client/Cargo.toml +++ b/beacon_node/client/Cargo.toml @@ -7,7 +7,6 @@ edition = "2018" [dependencies] beacon_chain = { path = "../beacon_chain" } network = { path = "../network" } -http_server = { path = "../http_server" } rpc = { path = "../rpc" } rest_api = { path = "../rest_api" } prometheus = "^0.6" diff --git a/beacon_node/client/src/config.rs b/beacon_node/client/src/config.rs index 2b410312b..0d5d5f81d 100644 --- a/beacon_node/client/src/config.rs +++ b/beacon_node/client/src/config.rs @@ -1,6 +1,5 @@ use crate::Eth2Config; use clap::ArgMatches; -use http_server::HttpServerConfig; use network::NetworkConfig; use serde_derive::{Deserialize, Serialize}; use slog::{info, o, Drain}; @@ -25,7 +24,6 @@ pub struct Config { pub genesis_state: GenesisState, pub network: network::NetworkConfig, pub rpc: rpc::RPCConfig, - pub http: HttpServerConfig, pub rest_api: rest_api::ApiConfig, } @@ -61,7 +59,6 @@ impl Default for Config { db_name: "chain_db".to_string(), network: NetworkConfig::new(), rpc: rpc::RPCConfig::default(), - http: HttpServerConfig::default(), rest_api: rest_api::ApiConfig::default(), spec_constants: TESTNET_SPEC_CONSTANTS.into(), genesis_state: GenesisState::RecentGenesis { @@ -145,7 +142,6 @@ impl Config { self.network.apply_cli_args(args)?; self.rpc.apply_cli_args(args)?; - self.http.apply_cli_args(args)?; self.rest_api.apply_cli_args(args)?; if let Some(log_file) = args.value_of("logfile") { diff --git a/beacon_node/client/src/lib.rs b/beacon_node/client/src/lib.rs index 7a9152ee0..e7c3d2d8a 100644 --- a/beacon_node/client/src/lib.rs +++ b/beacon_node/client/src/lib.rs @@ -11,7 +11,6 @@ use beacon_chain::BeaconChain; use exit_future::Signal; use futures::{future::Future, Stream}; use network::Service as NetworkService; -use prometheus::Registry; use slog::{error, info, o}; use slot_clock::SlotClock; use std::marker::PhantomData; @@ -37,8 +36,6 @@ pub struct Client { pub network: Arc>, /// Signal to terminate the RPC server. pub rpc_exit_signal: Option, - /// Signal to terminate the HTTP server. - pub http_exit_signal: Option, /// Signal to terminate the slot timer. pub slot_timer_exit_signal: Option, /// Signal to terminate the API @@ -61,7 +58,6 @@ where log: slog::Logger, executor: &TaskExecutor, ) -> error::Result { - let metrics_registry = Registry::new(); let store = Arc::new(store); let seconds_per_slot = eth2_config.spec.seconds_per_slot; @@ -72,11 +68,6 @@ where eth2_config.spec.clone(), log.clone(), )?); - // Registry all beacon chain metrics with the global registry. - beacon_chain - .metrics - .register(&metrics_registry) - .expect("Failed to registry metrics"); if beacon_chain.read_slot_clock().is_none() { panic!("Cannot start client before genesis!") @@ -125,29 +116,13 @@ where None }; - // Start the `http_server` service. - // - // Note: presently we are ignoring the config and _always_ starting a HTTP server. - let http_exit_signal = if client_config.http.enabled { - Some(http_server::start_service( - &client_config.http, - executor, - network_send, - beacon_chain.clone(), - client_config.db_path().expect("unable to read datadir"), - metrics_registry, - &log, - )) - } else { - None - }; - // Start the `rest_api` service let api_exit_signal = if client_config.rest_api.enabled { match rest_api::start_server( &client_config.rest_api, executor, beacon_chain.clone(), + client_config.db_path().expect("unable to read datadir"), &log, ) { Ok(s) => Some(s), @@ -189,7 +164,6 @@ where Ok(Client { _client_config: client_config, beacon_chain, - http_exit_signal, rpc_exit_signal, slot_timer_exit_signal: Some(slot_timer_exit_signal), api_exit_signal, diff --git a/beacon_node/eth2-libp2p/Cargo.toml b/beacon_node/eth2-libp2p/Cargo.toml index 794b09712..006b895a1 100644 --- a/beacon_node/eth2-libp2p/Cargo.toml +++ b/beacon_node/eth2-libp2p/Cargo.toml @@ -26,3 +26,5 @@ smallvec = "0.6.10" fnv = "1.0.6" unsigned-varint = "0.2.2" bytes = "0.4.12" +lazy_static = "1.3.0" +lighthouse_metrics = { path = "../../eth2/utils/lighthouse_metrics" } diff --git a/beacon_node/eth2-libp2p/src/discovery.rs b/beacon_node/eth2-libp2p/src/discovery.rs index 4c1794945..ca98db324 100644 --- a/beacon_node/eth2-libp2p/src/discovery.rs +++ b/beacon_node/eth2-libp2p/src/discovery.rs @@ -1,3 +1,4 @@ +use crate::metrics; use crate::{error, NetworkConfig}; /// This manages the discovery and management of peers. /// @@ -159,10 +160,16 @@ where fn inject_connected(&mut self, peer_id: PeerId, _endpoint: ConnectedPoint) { self.connected_peers.insert(peer_id); + + metrics::inc_counter(&metrics::PEER_CONNECT_EVENT_COUNT); + metrics::set_gauge(&metrics::PEERS_CONNECTED, self.connected_peers() as i64); } fn inject_disconnected(&mut self, peer_id: &PeerId, _endpoint: ConnectedPoint) { self.connected_peers.remove(peer_id); + + metrics::inc_counter(&metrics::PEER_DISCONNECT_EVENT_COUNT); + metrics::set_gauge(&metrics::PEERS_CONNECTED, self.connected_peers() as i64); } fn inject_replaced( @@ -217,6 +224,7 @@ where } Discv5Event::SocketUpdated(socket) => { info!(self.log, "Address updated"; "IP" => format!("{}",socket.ip())); + metrics::inc_counter(&metrics::ADDRESS_UPDATE_COUNT); let mut address = Multiaddr::from(socket.ip()); address.push(Protocol::Tcp(self.tcp_port)); let enr = self.discovery.local_enr(); diff --git a/beacon_node/eth2-libp2p/src/lib.rs b/beacon_node/eth2-libp2p/src/lib.rs index 54a4f2a99..33d5ba9ed 100644 --- a/beacon_node/eth2-libp2p/src/lib.rs +++ b/beacon_node/eth2-libp2p/src/lib.rs @@ -2,10 +2,14 @@ /// all required libp2p functionality. /// /// This crate builds and manages the libp2p services required by the beacon node. +#[macro_use] +extern crate lazy_static; + pub mod behaviour; mod config; mod discovery; pub mod error; +mod metrics; pub mod rpc; mod service; diff --git a/beacon_node/eth2-libp2p/src/metrics.rs b/beacon_node/eth2-libp2p/src/metrics.rs new file mode 100644 index 000000000..b678ef6b4 --- /dev/null +++ b/beacon_node/eth2-libp2p/src/metrics.rs @@ -0,0 +1,20 @@ +pub use lighthouse_metrics::*; + +lazy_static! { + pub static ref ADDRESS_UPDATE_COUNT: Result = try_create_int_counter( + "libp2p_address_update_total", + "Count of libp2p socked updated events (when our view of our IP address has changed)" + ); + pub static ref PEERS_CONNECTED: Result = try_create_int_gauge( + "libp2p_peer_connected_peers_total", + "Count of libp2p peers currently connected" + ); + pub static ref PEER_CONNECT_EVENT_COUNT: Result = try_create_int_counter( + "libp2p_peer_connect_event_total", + "Count of libp2p peer connect events (not the current number of connected peers)" + ); + pub static ref PEER_DISCONNECT_EVENT_COUNT: Result = try_create_int_counter( + "libp2p_peer_disconnect_event_total", + "Count of libp2p peer disconnect events" + ); +} diff --git a/beacon_node/http_server/Cargo.toml b/beacon_node/http_server/Cargo.toml deleted file mode 100644 index e87ff2997..000000000 --- a/beacon_node/http_server/Cargo.toml +++ /dev/null @@ -1,23 +0,0 @@ -[package] -name = "http_server" -version = "0.1.0" -authors = ["Paul Hauner "] -edition = "2018" - -[dependencies] -beacon_chain = { path = "../beacon_chain" } -iron = "^0.6" -router = "^0.6" -network = { path = "../network" } -types = { path = "../../eth2/types" } -slot_clock = { path = "../../eth2/utils/slot_clock" } -persistent = "^0.4" -prometheus = { version = "^0.6", features = ["process"] } -clap = "2.32.0" -futures = "0.1.23" -serde = "1.0" -serde_derive = "1.0" -serde_json = "1.0" -slog = { version = "^2.2.3" , features = ["max_level_trace"] } -tokio = "0.1.17" -exit-future = "0.1.4" diff --git a/beacon_node/http_server/src/api.rs b/beacon_node/http_server/src/api.rs deleted file mode 100644 index 8cb023b02..000000000 --- a/beacon_node/http_server/src/api.rs +++ /dev/null @@ -1,71 +0,0 @@ -use crate::{key::BeaconChainKey, map_persistent_err_to_500}; -use beacon_chain::{BeaconChain, BeaconChainTypes}; -use iron::prelude::*; -use iron::{ - headers::{CacheControl, CacheDirective, ContentType}, - status::Status, - AfterMiddleware, Handler, IronResult, Request, Response, -}; -use persistent::Read; -use router::Router; -use serde_json::json; -use std::sync::Arc; - -/// Yields a handler for the HTTP API. -pub fn build_handler( - beacon_chain: Arc>, -) -> impl Handler { - let mut router = Router::new(); - - router.get("/node/fork", handle_fork::, "fork"); - - let mut chain = Chain::new(router); - - // Insert `BeaconChain` so it may be accessed in a request. - chain.link(Read::>::both(beacon_chain.clone())); - // Set the content-type headers. - chain.link_after(SetJsonContentType); - // Set the cache headers. - chain.link_after(SetCacheDirectives); - - chain -} - -/// Sets the `cache-control` headers on _all_ responses, unless they are already set. -struct SetCacheDirectives; -impl AfterMiddleware for SetCacheDirectives { - fn after(&self, _req: &mut Request, mut resp: Response) -> IronResult { - // This is run for every requests, AFTER all handlers have been executed - if resp.headers.get::() == None { - resp.headers.set(CacheControl(vec![ - CacheDirective::NoCache, - CacheDirective::NoStore, - ])); - } - Ok(resp) - } -} - -/// Sets the `content-type` headers on _all_ responses, unless they are already set. -struct SetJsonContentType; -impl AfterMiddleware for SetJsonContentType { - fn after(&self, _req: &mut Request, mut resp: Response) -> IronResult { - if resp.headers.get::() == None { - resp.headers.set(ContentType::json()); - } - Ok(resp) - } -} - -fn handle_fork(req: &mut Request) -> IronResult { - let beacon_chain = req - .get::>>() - .map_err(map_persistent_err_to_500)?; - - let response = json!({ - "fork": beacon_chain.head().beacon_state.fork, - "network_id": beacon_chain.spec.network_id - }); - - Ok(Response::with((Status::Ok, response.to_string()))) -} diff --git a/beacon_node/http_server/src/key.rs b/beacon_node/http_server/src/key.rs deleted file mode 100644 index a69da6747..000000000 --- a/beacon_node/http_server/src/key.rs +++ /dev/null @@ -1,33 +0,0 @@ -use crate::metrics::LocalMetrics; -use beacon_chain::{BeaconChain, BeaconChainTypes}; -use iron::typemap::Key; -use prometheus::Registry; -use std::marker::PhantomData; -use std::path::PathBuf; -use std::sync::Arc; - -pub struct BeaconChainKey { - _phantom: PhantomData, -} - -impl Key for BeaconChainKey { - type Value = Arc>; -} - -pub struct MetricsRegistryKey; - -impl Key for MetricsRegistryKey { - type Value = Registry; -} - -pub struct LocalMetricsKey; - -impl Key for LocalMetricsKey { - type Value = LocalMetrics; -} - -pub struct DBPathKey; - -impl Key for DBPathKey { - type Value = PathBuf; -} diff --git a/beacon_node/http_server/src/lib.rs b/beacon_node/http_server/src/lib.rs deleted file mode 100644 index f1d006a5b..000000000 --- a/beacon_node/http_server/src/lib.rs +++ /dev/null @@ -1,145 +0,0 @@ -mod api; -mod key; -mod metrics; - -use beacon_chain::{BeaconChain, BeaconChainTypes}; -use clap::ArgMatches; -use futures::Future; -use iron::prelude::*; -use network::NetworkMessage; -use prometheus::Registry; -use router::Router; -use serde_derive::{Deserialize, Serialize}; -use slog::{info, o, warn}; -use std::path::PathBuf; -use std::sync::Arc; -use tokio::runtime::TaskExecutor; -use tokio::sync::mpsc; - -#[derive(PartialEq, Clone, Debug, Serialize, Deserialize)] -pub struct HttpServerConfig { - pub enabled: bool, - pub listen_address: String, - pub listen_port: String, -} - -impl Default for HttpServerConfig { - fn default() -> Self { - Self { - enabled: false, - listen_address: "127.0.0.1".to_string(), - listen_port: "5052".to_string(), - } - } -} - -impl HttpServerConfig { - pub fn apply_cli_args(&mut self, args: &ArgMatches) -> Result<(), &'static str> { - if args.is_present("http") { - self.enabled = true; - } - - if let Some(listen_address) = args.value_of("http-address") { - self.listen_address = listen_address.to_string(); - } - - if let Some(listen_port) = args.value_of("http-port") { - self.listen_port = listen_port.to_string(); - } - - Ok(()) - } -} - -/// Build the `iron` HTTP server, defining the core routes. -pub fn create_iron_http_server( - beacon_chain: Arc>, - db_path: PathBuf, - metrics_registry: Registry, -) -> Iron { - let mut router = Router::new(); - - // A `GET` request to `/metrics` is handled by the `metrics` module. - router.get( - "/metrics", - metrics::build_handler(beacon_chain.clone(), db_path, metrics_registry), - "metrics", - ); - - // Any request to all other endpoints is handled by the `api` module. - router.any("/*", api::build_handler(beacon_chain.clone()), "api"); - - Iron::new(router) -} - -/// Start the HTTP service on the tokio `TaskExecutor`. -pub fn start_service( - config: &HttpServerConfig, - executor: &TaskExecutor, - _network_chan: mpsc::UnboundedSender, - beacon_chain: Arc>, - db_path: PathBuf, - metrics_registry: Registry, - log: &slog::Logger, -) -> exit_future::Signal { - let log = log.new(o!("Service"=>"HTTP")); - - // Create: - // - `shutdown_trigger` a one-shot to shut down this service. - // - `wait_for_shutdown` a future that will wait until someone calls shutdown. - let (shutdown_trigger, wait_for_shutdown) = exit_future::signal(); - - // Create an `iron` http, without starting it yet. - let iron = create_iron_http_server(beacon_chain, db_path, metrics_registry); - - // Create a HTTP server future. - // - // 1. Start the HTTP server - // 2. Build an exit future that will shutdown the server when requested. - // 3. Return the exit future, so the caller may shutdown the service when desired. - let http_service = { - let listen_address = format!("{}:{}", config.listen_address, config.listen_port); - // Start the HTTP server - let server_start_result = iron.http(listen_address.clone()); - - if server_start_result.is_ok() { - info!(log, "HTTP server running on {}", listen_address); - } else { - warn!(log, "HTTP server failed to start on {}", listen_address); - } - - // Build a future that will shutdown the HTTP server when the `shutdown_trigger` is - // triggered. - wait_for_shutdown.and_then(move |_| { - info!(log, "HTTP server shutting down"); - - if let Ok(mut server) = server_start_result { - // According to the documentation, `server.close()` "doesn't work" and the server - // keeps listening. - // - // It is being called anyway, because it seems like the right thing to do. If you - // know this has negative side-effects, please create an issue to discuss. - // - // See: https://docs.rs/iron/0.6.0/iron/struct.Listening.html#impl - match server.close() { - _ => (), - }; - } - info!(log, "HTTP server shutdown complete."); - Ok(()) - }) - }; - - // Attach the HTTP server to the executor. - executor.spawn(http_service); - - shutdown_trigger -} - -/// Helper function for mapping a failure to read state to a 500 server error. -fn map_persistent_err_to_500(e: persistent::PersistentError) -> iron::error::IronError { - iron::error::IronError { - error: Box::new(e), - response: iron::Response::with(iron::status::Status::InternalServerError), - } -} diff --git a/beacon_node/http_server/src/metrics.rs b/beacon_node/http_server/src/metrics.rs deleted file mode 100644 index 1b1ed1f3d..000000000 --- a/beacon_node/http_server/src/metrics.rs +++ /dev/null @@ -1,72 +0,0 @@ -use crate::{ - key::{BeaconChainKey, DBPathKey, LocalMetricsKey, MetricsRegistryKey}, - map_persistent_err_to_500, -}; -use beacon_chain::{BeaconChain, BeaconChainTypes}; -use iron::prelude::*; -use iron::{status::Status, Handler, IronResult, Request, Response}; -use persistent::Read; -use prometheus::{Encoder, Registry, TextEncoder}; -use std::path::PathBuf; -use std::sync::Arc; - -pub use local_metrics::LocalMetrics; - -mod local_metrics; - -/// Yields a handler for the metrics endpoint. -pub fn build_handler( - beacon_chain: Arc>, - db_path: PathBuf, - metrics_registry: Registry, -) -> impl Handler { - let mut chain = Chain::new(handle_metrics::); - - let local_metrics = LocalMetrics::new().unwrap(); - local_metrics.register(&metrics_registry).unwrap(); - - chain.link(Read::>::both(beacon_chain)); - chain.link(Read::::both(metrics_registry)); - chain.link(Read::::both(local_metrics)); - chain.link(Read::::both(db_path)); - - chain -} - -/// Handle a request for Prometheus metrics. -/// -/// Returns a text string containing all metrics. -fn handle_metrics(req: &mut Request) -> IronResult { - let beacon_chain = req - .get::>>() - .map_err(map_persistent_err_to_500)?; - - let r = req - .get::>() - .map_err(map_persistent_err_to_500)?; - - let local_metrics = req - .get::>() - .map_err(map_persistent_err_to_500)?; - - let db_path = req - .get::>() - .map_err(map_persistent_err_to_500)?; - - // Update metrics that are calculated on each scrape. - local_metrics.update(&beacon_chain, &db_path); - - let mut buffer = vec![]; - let encoder = TextEncoder::new(); - - // Gather `DEFAULT_REGISTRY` metrics. - encoder.encode(&prometheus::gather(), &mut buffer).unwrap(); - - // Gather metrics from our registry. - let metric_families = r.gather(); - encoder.encode(&metric_families, &mut buffer).unwrap(); - - let prom_string = String::from_utf8(buffer).unwrap(); - - Ok(Response::with((Status::Ok, prom_string))) -} diff --git a/beacon_node/http_server/src/metrics/local_metrics.rs b/beacon_node/http_server/src/metrics/local_metrics.rs deleted file mode 100644 index b342cca81..000000000 --- a/beacon_node/http_server/src/metrics/local_metrics.rs +++ /dev/null @@ -1,154 +0,0 @@ -use beacon_chain::{BeaconChain, BeaconChainTypes}; -use prometheus::{IntGauge, Opts, Registry}; -use slot_clock::SlotClock; -use std::fs; -use std::path::PathBuf; -use types::{EthSpec, Slot}; - -// If set to `true` will iterate and sum the balances of all validators in the state for each -// scrape. -const SHOULD_SUM_VALIDATOR_BALANCES: bool = true; - -pub struct LocalMetrics { - present_slot: IntGauge, - present_epoch: IntGauge, - best_slot: IntGauge, - best_beacon_block_root: IntGauge, - justified_beacon_block_root: IntGauge, - finalized_beacon_block_root: IntGauge, - validator_count: IntGauge, - justified_epoch: IntGauge, - finalized_epoch: IntGauge, - validator_balances_sum: IntGauge, - database_size: IntGauge, -} - -impl LocalMetrics { - /// Create a new instance. - pub fn new() -> Result { - Ok(Self { - present_slot: { - let opts = Opts::new("present_slot", "slot_at_time_of_scrape"); - IntGauge::with_opts(opts)? - }, - present_epoch: { - let opts = Opts::new("present_epoch", "epoch_at_time_of_scrape"); - IntGauge::with_opts(opts)? - }, - best_slot: { - let opts = Opts::new("best_slot", "slot_of_block_at_chain_head"); - IntGauge::with_opts(opts)? - }, - best_beacon_block_root: { - let opts = Opts::new("best_beacon_block_root", "root_of_block_at_chain_head"); - IntGauge::with_opts(opts)? - }, - justified_beacon_block_root: { - let opts = Opts::new( - "justified_beacon_block_root", - "root_of_block_at_justified_head", - ); - IntGauge::with_opts(opts)? - }, - finalized_beacon_block_root: { - let opts = Opts::new( - "finalized_beacon_block_root", - "root_of_block_at_finalized_head", - ); - IntGauge::with_opts(opts)? - }, - validator_count: { - let opts = Opts::new("validator_count", "number_of_validators"); - IntGauge::with_opts(opts)? - }, - justified_epoch: { - let opts = Opts::new("justified_epoch", "state_justified_epoch"); - IntGauge::with_opts(opts)? - }, - finalized_epoch: { - let opts = Opts::new("finalized_epoch", "state_finalized_epoch"); - IntGauge::with_opts(opts)? - }, - validator_balances_sum: { - let opts = Opts::new("validator_balances_sum", "sum_of_all_validator_balances"); - IntGauge::with_opts(opts)? - }, - database_size: { - let opts = Opts::new("database_size", "size_of_on_disk_db_in_mb"); - IntGauge::with_opts(opts)? - }, - }) - } - - /// Registry this instance with the `registry`. - pub fn register(&self, registry: &Registry) -> Result<(), prometheus::Error> { - registry.register(Box::new(self.present_slot.clone()))?; - registry.register(Box::new(self.present_epoch.clone()))?; - registry.register(Box::new(self.best_slot.clone()))?; - registry.register(Box::new(self.best_beacon_block_root.clone()))?; - registry.register(Box::new(self.justified_beacon_block_root.clone()))?; - registry.register(Box::new(self.finalized_beacon_block_root.clone()))?; - registry.register(Box::new(self.validator_count.clone()))?; - registry.register(Box::new(self.finalized_epoch.clone()))?; - registry.register(Box::new(self.justified_epoch.clone()))?; - registry.register(Box::new(self.validator_balances_sum.clone()))?; - registry.register(Box::new(self.database_size.clone()))?; - - Ok(()) - } - - /// Update the metrics in `self` to the latest values. - pub fn update(&self, beacon_chain: &BeaconChain, db_path: &PathBuf) { - let state = &beacon_chain.head().beacon_state; - - let present_slot = beacon_chain - .slot_clock - .present_slot() - .unwrap_or_else(|_| None) - .unwrap_or_else(|| Slot::new(0)); - self.present_slot.set(present_slot.as_u64() as i64); - self.present_epoch - .set(present_slot.epoch(T::EthSpec::slots_per_epoch()).as_u64() as i64); - - self.best_slot.set(state.slot.as_u64() as i64); - self.best_beacon_block_root - .set(beacon_chain.head().beacon_block_root.to_low_u64_le() as i64); - self.justified_beacon_block_root.set( - beacon_chain - .head() - .beacon_state - .current_justified_checkpoint - .root - .to_low_u64_le() as i64, - ); - self.finalized_beacon_block_root.set( - beacon_chain - .head() - .beacon_state - .finalized_checkpoint - .root - .to_low_u64_le() as i64, - ); - self.validator_count.set(state.validators.len() as i64); - self.justified_epoch - .set(state.current_justified_checkpoint.epoch.as_u64() as i64); - self.finalized_epoch - .set(state.finalized_checkpoint.epoch.as_u64() as i64); - if SHOULD_SUM_VALIDATOR_BALANCES { - self.validator_balances_sum - .set(state.balances.iter().sum::() as i64); - } - let db_size = if let Ok(iter) = fs::read_dir(db_path) { - iter.filter_map(Result::ok) - .map(size_of_dir_entry) - .fold(0_u64, |sum, val| sum + val) - } else { - 0 - }; - self.database_size.set(db_size as i64); - } -} - -fn size_of_dir_entry(dir: fs::DirEntry) -> u64 { - dir.metadata().map(|m| m.len()).unwrap_or(0) -} diff --git a/beacon_node/rest_api/Cargo.toml b/beacon_node/rest_api/Cargo.toml index fb6cb8413..c7026014c 100644 --- a/beacon_node/rest_api/Cargo.toml +++ b/beacon_node/rest_api/Cargo.toml @@ -18,8 +18,12 @@ state_processing = { path = "../../eth2/state_processing" } types = { path = "../../eth2/types" } clap = "2.32.0" http = "^0.1.17" +prometheus = { version = "^0.6", features = ["process"] } hyper = "0.12.32" futures = "0.1" exit-future = "0.1.3" tokio = "0.1.17" url = "2.0" +lazy_static = "1.3.0" +lighthouse_metrics = { path = "../../eth2/utils/lighthouse_metrics" } +slot_clock = { path = "../../eth2/utils/slot_clock" } diff --git a/beacon_node/rest_api/src/config.rs b/beacon_node/rest_api/src/config.rs index c4a9c738a..90ac0821b 100644 --- a/beacon_node/rest_api/src/config.rs +++ b/beacon_node/rest_api/src/config.rs @@ -18,7 +18,7 @@ impl Default for Config { Config { enabled: true, // rest_api enabled by default listen_address: Ipv4Addr::new(127, 0, 0, 1), - port: 1248, + port: 5052, } } } diff --git a/beacon_node/rest_api/src/lib.rs b/beacon_node/rest_api/src/lib.rs index 4f07b482a..e267ce313 100644 --- a/beacon_node/rest_api/src/lib.rs +++ b/beacon_node/rest_api/src/lib.rs @@ -1,8 +1,10 @@ -extern crate futures; -extern crate hyper; +#[macro_use] +extern crate lazy_static; + mod beacon; mod config; mod helpers; +mod metrics; mod node; mod spec; mod url_query; @@ -13,6 +15,8 @@ use hyper::rt::Future; use hyper::service::service_fn_ok; use hyper::{Body, Method, Response, Server, StatusCode}; use slog::{info, o, warn}; +use std::ops::Deref; +use std::path::PathBuf; use std::sync::Arc; use tokio::runtime::TaskExecutor; use url_query::UrlQuery; @@ -68,6 +72,7 @@ pub fn start_server( config: &ApiConfig, executor: &TaskExecutor, beacon_chain: Arc>, + db_path: PathBuf, log: &slog::Logger, ) -> Result { let log = log.new(o!("Service" => "Api")); @@ -81,6 +86,8 @@ pub fn start_server( Ok(()) }); + let db_path = DBPath(db_path); + // Get the address to bind to let bind_addr = (config.listen_address, config.port).into(); @@ -91,12 +98,17 @@ pub fn start_server( let service = move || { let log = server_log.clone(); let beacon_chain = server_bc.clone(); + let db_path = db_path.clone(); // Create a simple handler for the router, inject our stateful objects into the request. service_fn_ok(move |mut req| { + metrics::inc_counter(&metrics::REQUEST_COUNT); + let timer = metrics::start_timer(&metrics::REQUEST_RESPONSE_TIME); + req.extensions_mut().insert::(log.clone()); req.extensions_mut() .insert::>>(beacon_chain.clone()); + req.extensions_mut().insert::(db_path.clone()); let path = req.uri().path().to_string(); @@ -109,6 +121,7 @@ pub fn start_server( } (&Method::GET, "/beacon/state") => beacon::get_state::(req), (&Method::GET, "/beacon/state_root") => beacon::get_state_root::(req), + (&Method::GET, "/metrics") => metrics::get_prometheus::(req), (&Method::GET, "/node/version") => node::get_version(req), (&Method::GET, "/node/genesis_time") => node::get_genesis_time::(req), (&Method::GET, "/spec") => spec::get_spec::(req), @@ -116,9 +129,10 @@ pub fn start_server( _ => Err(ApiError::MethodNotAllowed(path.clone())), }; - match result { + let response = match result { // Return the `hyper::Response`. Ok(response) => { + metrics::inc_counter(&metrics::SUCCESS_COUNT); slog::debug!(log, "Request successful: {:?}", path); response } @@ -127,7 +141,11 @@ pub fn start_server( slog::debug!(log, "Request failure: {:?}", path); e.into() } - } + }; + + metrics::stop_timer(timer); + + response }) }; @@ -160,3 +178,14 @@ fn success_response(body: Body) -> Response { .body(body) .expect("We should always be able to make response from the success body.") } + +#[derive(Clone)] +pub struct DBPath(PathBuf); + +impl Deref for DBPath { + type Target = PathBuf; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} diff --git a/beacon_node/rest_api/src/metrics.rs b/beacon_node/rest_api/src/metrics.rs new file mode 100644 index 000000000..064359337 --- /dev/null +++ b/beacon_node/rest_api/src/metrics.rs @@ -0,0 +1,69 @@ +use crate::{success_response, ApiError, ApiResult, DBPath}; +use beacon_chain::{BeaconChain, BeaconChainTypes}; +use hyper::{Body, Request}; +use prometheus::{Encoder, TextEncoder}; +use std::sync::Arc; + +pub use lighthouse_metrics::*; + +lazy_static! { + pub static ref REQUEST_RESPONSE_TIME: Result = try_create_histogram( + "http_server_request_duration_seconds", + "Time taken to build a response to a HTTP request" + ); + pub static ref REQUEST_COUNT: Result = try_create_int_counter( + "http_server_request_total", + "Total count of HTTP requests received" + ); + pub static ref SUCCESS_COUNT: Result = try_create_int_counter( + "http_server_success_total", + "Total count of HTTP 200 responses sent" + ); +} + +/// Returns the full set of Prometheus metrics for the Beacon Node application. +/// +/// # Note +/// +/// This is a HTTP handler method. +pub fn get_prometheus(req: Request) -> ApiResult { + let mut buffer = vec![]; + let encoder = TextEncoder::new(); + + let beacon_chain = req + .extensions() + .get::>>() + .ok_or_else(|| ApiError::ServerError("Beacon chain extension missing".to_string()))?; + let db_path = req + .extensions() + .get::() + .ok_or_else(|| ApiError::ServerError("DBPath extension missing".to_string()))?; + + // There are two categories of metrics: + // + // - Dynamically updated: things like histograms and event counters that are updated on the + // fly. + // - Statically updated: things which are only updated at the time of the scrape (used where we + // can avoid cluttering up code with metrics calls). + // + // The `lighthouse_metrics` crate has a `DEFAULT_REGISTRY` global singleton (via `lazy_static`) + // which keeps the state of all the metrics. Dynamically updated things will already be + // up-to-date in the registry (because they update themselves) however statically updated + // things need to be "scraped". + // + // We proceed by, first updating all the static metrics using `scrape_for_metrics(..)`. Then, + // using `lighthouse_metrics::gather(..)` to collect the global `DEFAULT_REGISTRY` metrics into + // a string that can be returned via HTTP. + + slot_clock::scrape_for_metrics::(&beacon_chain.slot_clock); + store::scrape_for_metrics(&db_path); + beacon_chain::scrape_for_metrics(&beacon_chain); + + encoder + .encode(&lighthouse_metrics::gather(), &mut buffer) + .unwrap(); + + String::from_utf8(buffer) + .map(|string| success_response(Body::from(string))) + .map_err(|e| ApiError::ServerError(format!("Failed to encode prometheus info: {:?}", e))) +} diff --git a/beacon_node/src/main.rs b/beacon_node/src/main.rs index 2e3ad0691..9a52f2638 100644 --- a/beacon_node/src/main.rs +++ b/beacon_node/src/main.rs @@ -128,28 +128,6 @@ fn main() { .help("Listen port for RPC endpoint.") .takes_value(true), ) - /* - * HTTP server parameters. - */ - .arg( - Arg::with_name("http") - .long("http") - .help("Enable the HTTP server.") - .takes_value(false), - ) - .arg( - Arg::with_name("http-address") - .long("http-address") - .value_name("Address") - .help("Listen address for the HTTP server.") - .takes_value(true), - ) - .arg( - Arg::with_name("http-port") - .long("http-port") - .help("Listen port for the HTTP server.") - .takes_value(true), - ) /* Client related arguments */ .arg( Arg::with_name("api") diff --git a/beacon_node/store/Cargo.toml b/beacon_node/store/Cargo.toml index 9607e8b8e..cd9711253 100644 --- a/beacon_node/store/Cargo.toml +++ b/beacon_node/store/Cargo.toml @@ -15,3 +15,5 @@ eth2_ssz = "0.1" eth2_ssz_derive = "0.1" tree_hash = "0.1" types = { path = "../../eth2/types" } +lazy_static = "1.3.0" +lighthouse_metrics = { path = "../../eth2/utils/lighthouse_metrics" } diff --git a/beacon_node/store/src/impls.rs b/beacon_node/store/src/impls.rs index e88b70f39..ed724480c 100644 --- a/beacon_node/store/src/impls.rs +++ b/beacon_node/store/src/impls.rs @@ -9,10 +9,26 @@ impl StoreItem for BeaconBlock { } fn as_store_bytes(&self) -> Vec { - self.as_ssz_bytes() + let timer = metrics::start_timer(&metrics::BEACON_BLOCK_WRITE_TIMES); + let bytes = self.as_ssz_bytes(); + + metrics::stop_timer(timer); + metrics::inc_counter(&metrics::BEACON_BLOCK_WRITE_COUNT); + metrics::inc_counter_by(&metrics::BEACON_BLOCK_WRITE_BYTES, bytes.len() as i64); + + bytes } fn from_store_bytes(bytes: &mut [u8]) -> Result { - Self::from_ssz_bytes(bytes).map_err(Into::into) + let timer = metrics::start_timer(&metrics::BEACON_BLOCK_READ_TIMES); + + let len = bytes.len(); + let result = Self::from_ssz_bytes(bytes).map_err(Into::into); + + metrics::stop_timer(timer); + metrics::inc_counter(&metrics::BEACON_BLOCK_READ_COUNT); + metrics::inc_counter_by(&metrics::BEACON_BLOCK_READ_BYTES, len as i64); + + result } } diff --git a/beacon_node/store/src/impls/beacon_state.rs b/beacon_node/store/src/impls/beacon_state.rs index 591663fe0..69e83cd63 100644 --- a/beacon_node/store/src/impls/beacon_state.rs +++ b/beacon_node/store/src/impls/beacon_state.rs @@ -53,12 +53,29 @@ impl StoreItem for BeaconState { } fn as_store_bytes(&self) -> Vec { + let timer = metrics::start_timer(&metrics::BEACON_STATE_WRITE_TIMES); + let container = StorageContainer::new(self); - container.as_ssz_bytes() + let bytes = container.as_ssz_bytes(); + + metrics::stop_timer(timer); + metrics::inc_counter(&metrics::BEACON_STATE_WRITE_COUNT); + metrics::inc_counter_by(&metrics::BEACON_STATE_WRITE_BYTES, bytes.len() as i64); + + bytes } fn from_store_bytes(bytes: &mut [u8]) -> Result { + let timer = metrics::start_timer(&metrics::BEACON_STATE_READ_TIMES); + + let len = bytes.len(); let container = StorageContainer::from_ssz_bytes(bytes)?; - container.try_into() + let result = container.try_into(); + + metrics::stop_timer(timer); + metrics::inc_counter(&metrics::BEACON_STATE_READ_COUNT); + metrics::inc_counter_by(&metrics::BEACON_STATE_READ_BYTES, len as i64); + + result } } diff --git a/beacon_node/store/src/leveldb_store.rs b/beacon_node/store/src/leveldb_store.rs index 699861e3a..a085d845a 100644 --- a/beacon_node/store/src/leveldb_store.rs +++ b/beacon_node/store/src/leveldb_store.rs @@ -1,4 +1,5 @@ use super::*; +use crate::metrics; use db_key::Key; use leveldb::database::kv::KV; use leveldb::database::Database; @@ -62,15 +63,27 @@ impl Store for LevelDB { fn get_bytes(&self, col: &str, key: &[u8]) -> Result>, Error> { let column_key = Self::get_key_for_col(col, key); - self.db + metrics::inc_counter(&metrics::DISK_DB_READ_COUNT); + + let result = self + .db .get(self.read_options(), column_key) - .map_err(Into::into) + .map_err(Into::into); + + if let Ok(Some(bytes)) = &result { + metrics::inc_counter_by(&metrics::DISK_DB_READ_BYTES, bytes.len() as i64) + } + + result } /// Store some `value` in `column`, indexed with `key`. fn put_bytes(&self, col: &str, key: &[u8], val: &[u8]) -> Result<(), Error> { let column_key = Self::get_key_for_col(col, key); + metrics::inc_counter(&metrics::DISK_DB_WRITE_COUNT); + metrics::inc_counter_by(&metrics::DISK_DB_WRITE_BYTES, val.len() as i64); + self.db .put(self.write_options(), column_key, val) .map_err(Into::into) @@ -80,6 +93,8 @@ impl Store for LevelDB { fn key_exists(&self, col: &str, key: &[u8]) -> Result { let column_key = Self::get_key_for_col(col, key); + metrics::inc_counter(&metrics::DISK_DB_EXISTS_COUNT); + self.db .get(self.read_options(), column_key) .map_err(Into::into) @@ -89,6 +104,9 @@ impl Store for LevelDB { /// Removes `key` from `column`. fn key_delete(&self, col: &str, key: &[u8]) -> Result<(), Error> { let column_key = Self::get_key_for_col(col, key); + + metrics::inc_counter(&metrics::DISK_DB_DELETE_COUNT); + self.db .delete(self.write_options(), column_key) .map_err(Into::into) diff --git a/beacon_node/store/src/lib.rs b/beacon_node/store/src/lib.rs index 5b8d58320..9c0e3cbae 100644 --- a/beacon_node/store/src/lib.rs +++ b/beacon_node/store/src/lib.rs @@ -7,18 +7,22 @@ //! //! Provides a simple API for storing/retrieving all types that sometimes needs type-hints. See //! tests for implementation examples. +#[macro_use] +extern crate lazy_static; mod block_at_slot; mod errors; mod impls; mod leveldb_store; mod memory_store; +mod metrics; pub mod iter; pub use self::leveldb_store::LevelDB as DiskStore; pub use self::memory_store::MemoryStore; pub use errors::Error; +pub use metrics::scrape_for_metrics; pub use types::*; /// An object capable of storing and retrieving objects implementing `StoreItem`. diff --git a/beacon_node/store/src/metrics.rs b/beacon_node/store/src/metrics.rs new file mode 100644 index 000000000..90237824d --- /dev/null +++ b/beacon_node/store/src/metrics.rs @@ -0,0 +1,106 @@ +pub use lighthouse_metrics::{set_gauge, try_create_int_gauge, *}; + +use std::fs; +use std::path::PathBuf; + +lazy_static! { + /* + * General + */ + pub static ref DISK_DB_SIZE: Result = + try_create_int_gauge("store_disk_db_size", "Size of the on-disk database (bytes)"); + pub static ref DISK_DB_WRITE_BYTES: Result = try_create_int_counter( + "store_disk_db_write_bytes_total", + "Number of bytes attempted to be written to the on-disk DB" + ); + pub static ref DISK_DB_READ_BYTES: Result = try_create_int_counter( + "store_disk_db_read_bytes_total", + "Number of bytes read from the on-disk DB" + ); + pub static ref DISK_DB_READ_COUNT: Result = try_create_int_counter( + "store_disk_db_read_count_total", + "Total number of reads to the on-disk DB" + ); + pub static ref DISK_DB_WRITE_COUNT: Result = try_create_int_counter( + "store_disk_db_write_count_total", + "Total number of writes to the on-disk DB" + ); + pub static ref DISK_DB_EXISTS_COUNT: Result = try_create_int_counter( + "store_disk_db_exists_count_total", + "Total number of checks if a key is in the on-disk DB" + ); + pub static ref DISK_DB_DELETE_COUNT: Result = try_create_int_counter( + "store_disk_db_delete_count_total", + "Total number of deletions from the on-disk DB" + ); + /* + * Beacon State + */ + pub static ref BEACON_STATE_READ_TIMES: Result = try_create_histogram( + "store_beacon_state_read_overhead_seconds", + "Overhead on reading a beacon state from the DB (e.g., decoding)" + ); + pub static ref BEACON_STATE_READ_COUNT: Result = try_create_int_counter( + "store_beacon_state_read_total", + "Total number of beacon state reads from the DB" + ); + pub static ref BEACON_STATE_READ_BYTES: Result = try_create_int_counter( + "store_beacon_state_read_bytes_total", + "Total number of beacon state bytes read from the DB" + ); + pub static ref BEACON_STATE_WRITE_TIMES: Result = try_create_histogram( + "store_beacon_state_write_overhead_seconds", + "Overhead on writing a beacon state to the DB (e.g., encoding)" + ); + pub static ref BEACON_STATE_WRITE_COUNT: Result = try_create_int_counter( + "store_beacon_state_write_total", + "Total number of beacon state writes the DB" + ); + pub static ref BEACON_STATE_WRITE_BYTES: Result = try_create_int_counter( + "store_beacon_state_write_bytes_total", + "Total number of beacon state bytes written to the DB" + ); + /* + * Beacon Block + */ + pub static ref BEACON_BLOCK_READ_TIMES: Result = try_create_histogram( + "store_beacon_block_read_overhead_seconds", + "Overhead on reading a beacon block from the DB (e.g., decoding)" + ); + pub static ref BEACON_BLOCK_READ_COUNT: Result = try_create_int_counter( + "store_beacon_block_read_total", + "Total number of beacon block reads from the DB" + ); + pub static ref BEACON_BLOCK_READ_BYTES: Result = try_create_int_counter( + "store_beacon_block_read_bytes_total", + "Total number of beacon block bytes read from the DB" + ); + pub static ref BEACON_BLOCK_WRITE_TIMES: Result = try_create_histogram( + "store_beacon_block_write_overhead_seconds", + "Overhead on writing a beacon block to the DB (e.g., encoding)" + ); + pub static ref BEACON_BLOCK_WRITE_COUNT: Result = try_create_int_counter( + "store_beacon_block_write_total", + "Total number of beacon block writes the DB" + ); + pub static ref BEACON_BLOCK_WRITE_BYTES: Result = try_create_int_counter( + "store_beacon_block_write_bytes_total", + "Total number of beacon block bytes written to the DB" + ); +} + +/// Updates the global metrics registry with store-related information. +pub fn scrape_for_metrics(db_path: &PathBuf) { + let db_size = if let Ok(iter) = fs::read_dir(db_path) { + iter.filter_map(std::result::Result::ok) + .map(size_of_dir_entry) + .fold(0_u64, |sum, val| sum + val) + } else { + 0 + }; + set_gauge(&DISK_DB_SIZE, db_size as i64); +} + +fn size_of_dir_entry(dir: fs::DirEntry) -> u64 { + dir.metadata().map(|m| m.len()).unwrap_or(0) +} diff --git a/docs/config_examples/beacon-node.toml b/docs/config_examples/beacon-node.toml index 3c9f8b613..f0863934e 100644 --- a/docs/config_examples/beacon-node.toml +++ b/docs/config_examples/beacon-node.toml @@ -78,14 +78,6 @@ enabled = false listen_address = "127.0.0.1" port = 5051 -# -# Legacy HTTP server configuration. To be removed. -# -[http] -enabled = false -listen_address = "127.0.0.1" -listen_port = "5052" - # # RESTful HTTP API server configuration. # @@ -95,4 +87,4 @@ enabled = true # The listen port for the HTTP server. listen_address = "127.0.0.1" # The listen port for the HTTP server. -port = 1248 +port = 5052 diff --git a/eth2/utils/lighthouse_metrics/Cargo.toml b/eth2/utils/lighthouse_metrics/Cargo.toml new file mode 100644 index 000000000..0a24a96fb --- /dev/null +++ b/eth2/utils/lighthouse_metrics/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "lighthouse_metrics" +version = "0.1.0" +authors = ["Paul Hauner "] +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +lazy_static = "1.3.0" +prometheus = "^0.6" diff --git a/eth2/utils/lighthouse_metrics/src/lib.rs b/eth2/utils/lighthouse_metrics/src/lib.rs new file mode 100644 index 000000000..c9e66e971 --- /dev/null +++ b/eth2/utils/lighthouse_metrics/src/lib.rs @@ -0,0 +1,72 @@ +use prometheus::{HistogramOpts, HistogramTimer, Opts}; + +pub use prometheus::{Histogram, IntCounter, IntGauge, Result}; + +pub fn gather() -> Vec { + prometheus::gather() +} + +pub fn try_create_int_counter(name: &str, help: &str) -> Result { + let opts = Opts::new(name, help); + let counter = IntCounter::with_opts(opts)?; + prometheus::register(Box::new(counter.clone()))?; + Ok(counter) +} + +pub fn try_create_int_gauge(name: &str, help: &str) -> Result { + let opts = Opts::new(name, help); + let gauge = IntGauge::with_opts(opts)?; + prometheus::register(Box::new(gauge.clone()))?; + Ok(gauge) +} + +pub fn try_create_histogram(name: &str, help: &str) -> Result { + let opts = HistogramOpts::new(name, help); + let histogram = Histogram::with_opts(opts)?; + prometheus::register(Box::new(histogram.clone()))?; + Ok(histogram) +} + +pub fn start_timer(histogram: &Result) -> Option { + if let Ok(histogram) = histogram { + Some(histogram.start_timer()) + } else { + None + } +} + +pub fn stop_timer(timer: Option) { + timer.map(|t| t.observe_duration()); +} + +pub fn inc_counter(counter: &Result) { + if let Ok(counter) = counter { + counter.inc(); + } +} + +pub fn inc_counter_by(counter: &Result, value: i64) { + if let Ok(counter) = counter { + counter.inc_by(value); + } +} + +pub fn set_gauge(gauge: &Result, value: i64) { + if let Ok(gauge) = gauge { + gauge.set(value); + } +} + +pub fn observe(histogram: &Result, value: f64) { + if let Ok(histogram) = histogram { + histogram.observe(value); + } +} + +#[cfg(test)] +mod tests { + #[test] + fn it_works() { + assert_eq!(2 + 2, 4); + } +} diff --git a/eth2/utils/slot_clock/Cargo.toml b/eth2/utils/slot_clock/Cargo.toml index 31a435725..c4b9df5ed 100644 --- a/eth2/utils/slot_clock/Cargo.toml +++ b/eth2/utils/slot_clock/Cargo.toml @@ -6,3 +6,5 @@ edition = "2018" [dependencies] types = { path = "../../types" } +lazy_static = "1.3.0" +lighthouse_metrics = { path = "../lighthouse_metrics" } diff --git a/eth2/utils/slot_clock/src/lib.rs b/eth2/utils/slot_clock/src/lib.rs index 7b86684fa..871743c9e 100644 --- a/eth2/utils/slot_clock/src/lib.rs +++ b/eth2/utils/slot_clock/src/lib.rs @@ -1,9 +1,15 @@ +#[macro_use] +extern crate lazy_static; + +mod metrics; mod system_time_slot_clock; mod testing_slot_clock; +use std::time::Duration; + pub use crate::system_time_slot_clock::{Error as SystemTimeSlotClockError, SystemTimeSlotClock}; pub use crate::testing_slot_clock::{Error as TestingSlotClockError, TestingSlotClock}; -use std::time::Duration; +pub use metrics::scrape_for_metrics; pub use types::Slot; pub trait SlotClock: Send + Sync + Sized { @@ -17,4 +23,6 @@ pub trait SlotClock: Send + Sync + Sized { fn present_slot(&self) -> Result, Self::Error>; fn duration_to_next_slot(&self) -> Result, Self::Error>; + + fn slot_duration_millis(&self) -> u64; } diff --git a/eth2/utils/slot_clock/src/metrics.rs b/eth2/utils/slot_clock/src/metrics.rs new file mode 100644 index 000000000..e0d3923e0 --- /dev/null +++ b/eth2/utils/slot_clock/src/metrics.rs @@ -0,0 +1,32 @@ +use crate::SlotClock; +pub use lighthouse_metrics::*; +use types::{EthSpec, Slot}; + +lazy_static! { + pub static ref PRESENT_SLOT: Result = + try_create_int_gauge("slotclock_present_slot", "The present wall-clock slot"); + pub static ref PRESENT_EPOCH: Result = + try_create_int_gauge("slotclock_present_epoch", "The present wall-clock epoch"); + pub static ref SLOTS_PER_EPOCH: Result = + try_create_int_gauge("slotclock_slots_per_epoch", "Slots per epoch (constant)"); + pub static ref MILLISECONDS_PER_SLOT: Result = try_create_int_gauge( + "slotclock_slot_time_milliseconds", + "The duration in milliseconds between each slot" + ); +} + +/// Update the global metrics `DEFAULT_REGISTRY` with info from the slot clock. +pub fn scrape_for_metrics(clock: &U) { + let present_slot = match clock.present_slot() { + Ok(Some(slot)) => slot, + _ => Slot::new(0), + }; + + set_gauge(&PRESENT_SLOT, present_slot.as_u64() as i64); + set_gauge( + &PRESENT_EPOCH, + present_slot.epoch(T::slots_per_epoch()).as_u64() as i64, + ); + set_gauge(&SLOTS_PER_EPOCH, T::slots_per_epoch() as i64); + set_gauge(&MILLISECONDS_PER_SLOT, clock.slot_duration_millis() as i64); +} diff --git a/eth2/utils/slot_clock/src/system_time_slot_clock.rs b/eth2/utils/slot_clock/src/system_time_slot_clock.rs index 7c184b02b..c493a8be8 100644 --- a/eth2/utils/slot_clock/src/system_time_slot_clock.rs +++ b/eth2/utils/slot_clock/src/system_time_slot_clock.rs @@ -52,6 +52,10 @@ impl SlotClock for SystemTimeSlotClock { fn duration_to_next_slot(&self) -> Result, Error> { duration_to_next_slot(self.genesis_seconds, self.slot_duration_seconds) } + + fn slot_duration_millis(&self) -> u64 { + self.slot_duration_seconds * 1000 + } } impl From for Error { diff --git a/eth2/utils/slot_clock/src/testing_slot_clock.rs b/eth2/utils/slot_clock/src/testing_slot_clock.rs index ab00d2baa..f741d3b87 100644 --- a/eth2/utils/slot_clock/src/testing_slot_clock.rs +++ b/eth2/utils/slot_clock/src/testing_slot_clock.rs @@ -40,6 +40,10 @@ impl SlotClock for TestingSlotClock { fn duration_to_next_slot(&self) -> Result, Error> { Ok(Some(Duration::from_secs(1))) } + + fn slot_duration_millis(&self) -> u64 { + 0 + } } #[cfg(test)]