Fix broken beacon chain metrics, add slot clock metrics
This commit is contained in:
parent
95a320817e
commit
d7c546844c
@ -1,3 +1,4 @@
|
|||||||
|
#![recursion_limit = "128"] // For lazy-static
|
||||||
#[macro_use]
|
#[macro_use]
|
||||||
extern crate lazy_static;
|
extern crate lazy_static;
|
||||||
|
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
use crate::{BeaconChain, BeaconChainTypes};
|
use crate::{BeaconChain, BeaconChainTypes};
|
||||||
pub use lighthouse_metrics::*;
|
pub use lighthouse_metrics::*;
|
||||||
use types::{BeaconState, Epoch, EthSpec, Hash256, Slot};
|
use types::{BeaconState, Epoch, Hash256, Slot};
|
||||||
|
|
||||||
lazy_static! {
|
lazy_static! {
|
||||||
/*
|
/*
|
||||||
@ -140,17 +140,6 @@ lazy_static! {
|
|||||||
*/
|
*/
|
||||||
pub static ref PERSIST_CHAIN: Result<Histogram> =
|
pub static ref PERSIST_CHAIN: Result<Histogram> =
|
||||||
try_create_histogram("beacon_persist_chain", "Time taken to update the canonical head");
|
try_create_histogram("beacon_persist_chain", "Time taken to update the canonical head");
|
||||||
}
|
|
||||||
|
|
||||||
// Lazy-static is split so we don't reach the crate-level recursion limit.
|
|
||||||
lazy_static! {
|
|
||||||
/*
|
|
||||||
* Slot Clock
|
|
||||||
*/
|
|
||||||
pub static ref PRESENT_SLOT: Result<IntGauge> =
|
|
||||||
try_create_int_gauge("beacon_present_slot", "The present slot, according to system time");
|
|
||||||
pub static ref PRESENT_EPOCH: Result<IntGauge> =
|
|
||||||
try_create_int_gauge("beacon_present_epoch", "The present epoch, according to system time");
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Chain Head
|
* Chain Head
|
||||||
@ -194,21 +183,6 @@ lazy_static! {
|
|||||||
/// Scrape the `beacon_chain` for metrics that are not constantly updated (e.g., the present slot,
|
/// Scrape the `beacon_chain` for metrics that are not constantly updated (e.g., the present slot,
|
||||||
/// head state info, etc) and update the Prometheus `DEFAULT_REGISTRY`.
|
/// head state info, etc) and update the Prometheus `DEFAULT_REGISTRY`.
|
||||||
pub fn scrape_for_metrics<T: BeaconChainTypes>(beacon_chain: &BeaconChain<T>) {
|
pub fn scrape_for_metrics<T: BeaconChainTypes>(beacon_chain: &BeaconChain<T>) {
|
||||||
set_gauge_by_slot(
|
|
||||||
&PRESENT_SLOT,
|
|
||||||
beacon_chain
|
|
||||||
.read_slot_clock()
|
|
||||||
.unwrap_or_else(|| Slot::new(0)),
|
|
||||||
);
|
|
||||||
|
|
||||||
set_gauge_by_epoch(
|
|
||||||
&PRESENT_EPOCH,
|
|
||||||
beacon_chain
|
|
||||||
.read_slot_clock()
|
|
||||||
.map(|s| s.epoch(T::EthSpec::slots_per_epoch()))
|
|
||||||
.unwrap_or_else(|| Epoch::new(0)),
|
|
||||||
);
|
|
||||||
|
|
||||||
scrape_head_state::<T>(
|
scrape_head_state::<T>(
|
||||||
&beacon_chain.head().beacon_state,
|
&beacon_chain.head().beacon_state,
|
||||||
beacon_chain.head().beacon_state_root,
|
beacon_chain.head().beacon_state_root,
|
||||||
|
@ -26,3 +26,4 @@ tokio = "0.1.17"
|
|||||||
url = "2.0"
|
url = "2.0"
|
||||||
lazy_static = "1.3.0"
|
lazy_static = "1.3.0"
|
||||||
lighthouse_metrics = { path = "../../eth2/utils/lighthouse_metrics" }
|
lighthouse_metrics = { path = "../../eth2/utils/lighthouse_metrics" }
|
||||||
|
slot_clock = { path = "../../eth2/utils/slot_clock" }
|
||||||
|
@ -39,6 +39,23 @@ pub fn get_prometheus<T: BeaconChainTypes + 'static>(req: Request<Body>) -> ApiR
|
|||||||
.get::<DBPath>()
|
.get::<DBPath>()
|
||||||
.ok_or_else(|| ApiError::ServerError("DBPath extension missing".to_string()))?;
|
.ok_or_else(|| ApiError::ServerError("DBPath extension missing".to_string()))?;
|
||||||
|
|
||||||
|
// There are two categories of metrics:
|
||||||
|
//
|
||||||
|
// - Dynamically updated: things like histograms and event counters that are updated on the
|
||||||
|
// fly.
|
||||||
|
// - Statically updated: things which are only updated at the time of the scrape (used where we
|
||||||
|
// can avoid cluttering up code with metrics calls).
|
||||||
|
//
|
||||||
|
// The `prometheus` crate has a `DEFAULT_REGISTRY` global singleton (via `lazy_static`) which
|
||||||
|
// keeps the state of all the metrics. Dynamically updated things will already be up-to-date in
|
||||||
|
// the registry (because they update themselves) however statically updated things need to be
|
||||||
|
// "scraped".
|
||||||
|
//
|
||||||
|
// We proceed by, first updating all the static metrics using `scrape_for_metrics(..)`. Then,
|
||||||
|
// using `prometheus::gather(..)` to collect the global `DEFAULT_REGISTRY` metrics into a
|
||||||
|
// string that can be returned via HTTP.
|
||||||
|
|
||||||
|
slot_clock::scrape_for_metrics::<T::EthSpec, T::SlotClock>(&beacon_chain.slot_clock);
|
||||||
store::scrape_for_metrics(&db_path);
|
store::scrape_for_metrics(&db_path);
|
||||||
beacon_chain::scrape_for_metrics(&beacon_chain);
|
beacon_chain::scrape_for_metrics(&beacon_chain);
|
||||||
|
|
||||||
|
@ -6,3 +6,5 @@ edition = "2018"
|
|||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
types = { path = "../../types" }
|
types = { path = "../../types" }
|
||||||
|
lazy_static = "1.3.0"
|
||||||
|
lighthouse_metrics = { path = "../lighthouse_metrics" }
|
||||||
|
@ -1,9 +1,15 @@
|
|||||||
|
#[macro_use]
|
||||||
|
extern crate lazy_static;
|
||||||
|
|
||||||
|
mod metrics;
|
||||||
mod system_time_slot_clock;
|
mod system_time_slot_clock;
|
||||||
mod testing_slot_clock;
|
mod testing_slot_clock;
|
||||||
|
|
||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
pub use crate::system_time_slot_clock::{Error as SystemTimeSlotClockError, SystemTimeSlotClock};
|
pub use crate::system_time_slot_clock::{Error as SystemTimeSlotClockError, SystemTimeSlotClock};
|
||||||
pub use crate::testing_slot_clock::{Error as TestingSlotClockError, TestingSlotClock};
|
pub use crate::testing_slot_clock::{Error as TestingSlotClockError, TestingSlotClock};
|
||||||
use std::time::Duration;
|
pub use metrics::scrape_for_metrics;
|
||||||
pub use types::Slot;
|
pub use types::Slot;
|
||||||
|
|
||||||
pub trait SlotClock: Send + Sync + Sized {
|
pub trait SlotClock: Send + Sync + Sized {
|
||||||
@ -17,4 +23,6 @@ pub trait SlotClock: Send + Sync + Sized {
|
|||||||
fn present_slot(&self) -> Result<Option<Slot>, Self::Error>;
|
fn present_slot(&self) -> Result<Option<Slot>, Self::Error>;
|
||||||
|
|
||||||
fn duration_to_next_slot(&self) -> Result<Option<Duration>, Self::Error>;
|
fn duration_to_next_slot(&self) -> Result<Option<Duration>, Self::Error>;
|
||||||
|
|
||||||
|
fn slot_duration_millis(&self) -> u64;
|
||||||
}
|
}
|
||||||
|
29
eth2/utils/slot_clock/src/metrics.rs
Normal file
29
eth2/utils/slot_clock/src/metrics.rs
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
use crate::SlotClock;
|
||||||
|
pub use lighthouse_metrics::*;
|
||||||
|
use types::{EthSpec, Slot};
|
||||||
|
|
||||||
|
lazy_static! {
|
||||||
|
pub static ref PRESENT_SLOT: Result<IntGauge> =
|
||||||
|
try_create_int_gauge("slotclock_present_slot", "The present wall-clock slot");
|
||||||
|
pub static ref PRESENT_EPOCH: Result<IntGauge> =
|
||||||
|
try_create_int_gauge("slotclock_present_epoch", "The present wall-clock epoch");
|
||||||
|
pub static ref MILLISECONDS_PER_SLOT: Result<IntGauge> = try_create_int_gauge(
|
||||||
|
"slotclock_slot_time_milliseconds",
|
||||||
|
"The duration in milliseconds between each slot"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Update the global metrics `DEFAULT_REGISTRY` with info from the slot clock.
|
||||||
|
pub fn scrape_for_metrics<T: EthSpec, U: SlotClock>(clock: &U) {
|
||||||
|
let present_slot = match clock.present_slot() {
|
||||||
|
Ok(Some(slot)) => slot,
|
||||||
|
_ => Slot::new(0),
|
||||||
|
};
|
||||||
|
|
||||||
|
set_gauge(&PRESENT_SLOT, present_slot.as_u64() as i64);
|
||||||
|
set_gauge(
|
||||||
|
&PRESENT_EPOCH,
|
||||||
|
present_slot.epoch(T::slots_per_epoch()).as_u64() as i64,
|
||||||
|
);
|
||||||
|
set_gauge(&MILLISECONDS_PER_SLOT, clock.slot_duration_millis() as i64);
|
||||||
|
}
|
@ -52,6 +52,10 @@ impl SlotClock for SystemTimeSlotClock {
|
|||||||
fn duration_to_next_slot(&self) -> Result<Option<Duration>, Error> {
|
fn duration_to_next_slot(&self) -> Result<Option<Duration>, Error> {
|
||||||
duration_to_next_slot(self.genesis_seconds, self.slot_duration_seconds)
|
duration_to_next_slot(self.genesis_seconds, self.slot_duration_seconds)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn slot_duration_millis(&self) -> u64 {
|
||||||
|
self.slot_duration_seconds * 1000
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<SystemTimeError> for Error {
|
impl From<SystemTimeError> for Error {
|
||||||
|
@ -40,6 +40,10 @@ impl SlotClock for TestingSlotClock {
|
|||||||
fn duration_to_next_slot(&self) -> Result<Option<Duration>, Error> {
|
fn duration_to_next_slot(&self) -> Result<Option<Duration>, Error> {
|
||||||
Ok(Some(Duration::from_secs(1)))
|
Ok(Some(Duration::from_secs(1)))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn slot_duration_millis(&self) -> u64 {
|
||||||
|
0
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
Loading…
Reference in New Issue
Block a user