Add extra prom beacon chain metrics

This commit is contained in:
Paul Hauner 2019-08-12 13:26:58 +10:00
parent 6150f0ae1a
commit 7140dbc45d
No known key found for this signature in database
GPG Key ID: 5E2CFF9B75FA63DF
10 changed files with 233 additions and 11 deletions

View File

@ -7,6 +7,7 @@ edition = "2018"
[dependencies]
store = { path = "../store" }
parking_lot = "0.7"
lazy_static = "1.3.0"
lighthouse_metrics = { path = "../../eth2/utils/lighthouse_metrics" }
log = "0.4"
operation_pool = { path = "../../eth2/operation_pool" }
@ -17,7 +18,6 @@ sloggers = { version = "^0.3" }
slot_clock = { path = "../../eth2/utils/slot_clock" }
eth2_ssz = "0.1"
eth2_ssz_derive = "0.1"
lazy_static = "1.3.0"
state_processing = { path = "../../eth2/state_processing" }
tree_hash = "0.1"
types = { path = "../../eth2/types" }

View File

@ -16,6 +16,7 @@ pub use self::beacon_chain::{
pub use self::checkpoint::CheckPoint;
pub use self::errors::{BeaconChainError, BlockProductionError};
pub use lmd_ghost;
pub use metrics::scrape_for_metrics;
pub use parking_lot;
pub use slot_clock;
pub use state_processing::per_block_processing::errors::{

View File

@ -1,4 +1,6 @@
use crate::{BeaconChain, BeaconChainTypes};
pub use lighthouse_metrics::*;
use types::{BeaconState, Epoch, EthSpec, Hash256, Slot};
lazy_static! {
/*
@ -133,15 +135,157 @@ lazy_static! {
"Time taken to add an attestation to fork choice"
);
/*
* Head Updating
*/
pub static ref UPDATE_HEAD_TIMES: Result<Histogram> =
try_create_histogram("update_head_times", "Time taken to update the canonical head");
/*
* Persisting BeaconChain to disk
*/
pub static ref PERSIST_CHAIN: Result<Histogram> =
try_create_histogram("persist_chain", "Time taken to update the canonical head");
}
// Lazy-static is split so we don't reach the crate-level recursion limit.
lazy_static! {
/*
* Slot Clock
*/
pub static ref PRESENT_SLOT: Result<IntGauge> =
try_create_int_gauge("present_slot", "The present slot, according to system time");
pub static ref PRESENT_EPOCH: Result<IntGauge> =
try_create_int_gauge("present_epoch", "The present epoch, according to system time");
/*
* Chain Head
*/
pub static ref UPDATE_HEAD_TIMES: Result<Histogram> =
try_create_histogram("update_head_times", "Time taken to update the canonical head");
pub static ref HEAD_STATE_SLOT: Result<IntGauge> =
try_create_int_gauge("head_state_slot", "Slot of the block at the head of the chain");
pub static ref HEAD_STATE_ROOT: Result<IntGauge> =
try_create_int_gauge("head_state_root", "Root of the block at the head of the chain");
pub static ref HEAD_STATE_LATEST_BLOCK_SLOT: Result<IntGauge> =
try_create_int_gauge("head_state_latest_block_slot", "Latest block slot at the head of the chain");
pub static ref HEAD_STATE_CURRENT_JUSTIFIED_ROOT: Result<IntGauge> =
try_create_int_gauge("head_state_current_justified_root", "Current justified root at the head of the chain");
pub static ref HEAD_STATE_CURRENT_JUSTIFIED_EPOCH: Result<IntGauge> =
try_create_int_gauge("head_state_current_justified_epoch", "Current justified epoch at the head of the chain");
pub static ref HEAD_STATE_PREVIOUS_JUSTIFIED_ROOT: Result<IntGauge> =
try_create_int_gauge("head_state_previous_justified_root", "Previous justified root at the head of the chain");
pub static ref HEAD_STATE_PREVIOUS_JUSTIFIED_EPOCH: Result<IntGauge> =
try_create_int_gauge("head_state_previous_justified_epoch", "Previous justified epoch at the head of the chain");
pub static ref HEAD_STATE_FINALIZED_ROOT: Result<IntGauge> =
try_create_int_gauge("head_state_finalized_root", "Finalized root at the head of the chain");
pub static ref HEAD_STATE_FINALIZED_EPOCH: Result<IntGauge> =
try_create_int_gauge("head_state_finalized_epoch", "Finalized epoch at the head of the chain");
pub static ref HEAD_STATE_TOTAL_VALIDATORS: Result<IntGauge> =
try_create_int_gauge("head_state_total_validators", "Count of validators at the head of the chain");
pub static ref HEAD_STATE_ACTIVE_VALIDATORS: Result<IntGauge> =
try_create_int_gauge("head_state_active_validators", "Count of active validators at the head of the chain");
pub static ref HEAD_STATE_VALIDATOR_BALANCES: Result<IntGauge> =
try_create_int_gauge("head_state_validator_balances", "Sum of all validator balances at the head of the chain");
pub static ref HEAD_STATE_SLASHED_VALIDATORS: Result<IntGauge> =
try_create_int_gauge("head_state_slashed_validators", "Count of all slashed validators at the head of the chain");
pub static ref HEAD_STATE_WITHDRAWN_VALIDATORS: Result<IntGauge> =
try_create_int_gauge("head_state_withdrawn_validators", "Sum of all validator balances at the head of the chain");
pub static ref HEAD_STATE_ETH1_DEPOSIT_INDEX: Result<IntGauge> =
try_create_int_gauge("head_state_eth1_deposit_index", "Eth1 deposit index at the head of the chain");
}
/// Scrape the `beacon_chain` for metrics that are not constantly updated (e.g., the present slot,
/// head state info, etc) and update the Prometheus `DEFAULT_REGISTRY`.
pub fn scrape_for_metrics<T: BeaconChainTypes>(beacon_chain: &BeaconChain<T>) {
set_gauge_by_slot(
&PRESENT_SLOT,
beacon_chain
.read_slot_clock()
.unwrap_or_else(|| Slot::new(0)),
);
set_gauge_by_epoch(
&PRESENT_EPOCH,
beacon_chain
.read_slot_clock()
.map(|s| s.epoch(T::EthSpec::slots_per_epoch()))
.unwrap_or_else(|| Epoch::new(0)),
);
scrape_head_state::<T>(
&beacon_chain.head().beacon_state,
beacon_chain.head().beacon_state_root,
);
}
/// Scrape the given `state` assuming it's the head state, updating the `DEFAULT_REGISTRY`.
fn scrape_head_state<T: BeaconChainTypes>(state: &BeaconState<T::EthSpec>, state_root: Hash256) {
set_gauge_by_slot(&HEAD_STATE_SLOT, state.slot);
set_gauge_by_hash(&HEAD_STATE_ROOT, state_root);
set_gauge_by_slot(
&HEAD_STATE_LATEST_BLOCK_SLOT,
state.latest_block_header.slot,
);
set_gauge_by_hash(
&HEAD_STATE_CURRENT_JUSTIFIED_ROOT,
state.current_justified_checkpoint.root,
);
set_gauge_by_epoch(
&HEAD_STATE_CURRENT_JUSTIFIED_EPOCH,
state.current_justified_checkpoint.epoch,
);
set_gauge_by_hash(
&HEAD_STATE_PREVIOUS_JUSTIFIED_ROOT,
state.previous_justified_checkpoint.root,
);
set_gauge_by_epoch(
&HEAD_STATE_PREVIOUS_JUSTIFIED_EPOCH,
state.previous_justified_checkpoint.epoch,
);
set_gauge_by_hash(&HEAD_STATE_FINALIZED_ROOT, state.finalized_checkpoint.root);
set_gauge_by_epoch(
&HEAD_STATE_FINALIZED_EPOCH,
state.finalized_checkpoint.epoch,
);
set_gauge_by_usize(&HEAD_STATE_TOTAL_VALIDATORS, state.validators.len());
set_gauge_by_u64(
&HEAD_STATE_VALIDATOR_BALANCES,
state.balances.iter().fold(0_u64, |acc, i| acc + i),
);
set_gauge_by_usize(
&HEAD_STATE_ACTIVE_VALIDATORS,
state
.validators
.iter()
.filter(|v| v.is_active_at(state.current_epoch()))
.count(),
);
set_gauge_by_usize(
&HEAD_STATE_SLASHED_VALIDATORS,
state.validators.iter().filter(|v| v.slashed).count(),
);
set_gauge_by_usize(
&HEAD_STATE_WITHDRAWN_VALIDATORS,
state
.validators
.iter()
.filter(|v| v.is_withdrawable_at(state.current_epoch()))
.count(),
);
set_gauge_by_u64(&HEAD_STATE_ETH1_DEPOSIT_INDEX, state.eth1_deposit_index);
}
fn set_gauge_by_slot(gauge: &Result<IntGauge>, value: Slot) {
set_gauge(gauge, value.as_u64() as i64);
}
fn set_gauge_by_epoch(gauge: &Result<IntGauge>, value: Epoch) {
set_gauge(gauge, value.as_u64() as i64);
}
fn set_gauge_by_hash(gauge: &Result<IntGauge>, value: Hash256) {
set_gauge(gauge, value.to_low_u64_le() as i64);
}
fn set_gauge_by_usize(gauge: &Result<IntGauge>, value: usize) {
set_gauge(gauge, value as i64);
}
fn set_gauge_by_u64(gauge: &Result<IntGauge>, value: u64) {
set_gauge(gauge, value as i64);
}

View File

@ -142,6 +142,7 @@ where
&client_config.rest_api,
executor,
beacon_chain.clone(),
client_config.db_path().expect("unable to read datadir"),
&log,
) {
Ok(s) => Some(s),

View File

@ -13,6 +13,8 @@ use hyper::rt::Future;
use hyper::service::service_fn_ok;
use hyper::{Body, Method, Response, Server, StatusCode};
use slog::{info, o, warn};
use std::ops::Deref;
use std::path::PathBuf;
use std::sync::Arc;
use tokio::runtime::TaskExecutor;
use url_query::UrlQuery;
@ -68,6 +70,7 @@ pub fn start_server<T: BeaconChainTypes + Clone + 'static>(
config: &ApiConfig,
executor: &TaskExecutor,
beacon_chain: Arc<BeaconChain<T>>,
db_path: PathBuf,
log: &slog::Logger,
) -> Result<exit_future::Signal, hyper::Error> {
let log = log.new(o!("Service" => "Api"));
@ -81,6 +84,8 @@ pub fn start_server<T: BeaconChainTypes + Clone + 'static>(
Ok(())
});
let db_path = DBPath(db_path);
// Get the address to bind to
let bind_addr = (config.listen_address, config.port).into();
@ -91,12 +96,14 @@ pub fn start_server<T: BeaconChainTypes + Clone + 'static>(
let service = move || {
let log = server_log.clone();
let beacon_chain = server_bc.clone();
let db_path = db_path.clone();
// Create a simple handler for the router, inject our stateful objects into the request.
service_fn_ok(move |mut req| {
req.extensions_mut().insert::<slog::Logger>(log.clone());
req.extensions_mut()
.insert::<Arc<BeaconChain<T>>>(beacon_chain.clone());
req.extensions_mut().insert::<DBPath>(db_path.clone());
let path = req.uri().path().to_string();
@ -104,7 +111,7 @@ pub fn start_server<T: BeaconChainTypes + Clone + 'static>(
let result = match (req.method(), path.as_ref()) {
(&Method::GET, "/beacon/state") => beacon::get_state::<T>(req),
(&Method::GET, "/beacon/state_root") => beacon::get_state_root::<T>(req),
(&Method::GET, "/metrics") => metrics::get_prometheus(req),
(&Method::GET, "/metrics") => metrics::get_prometheus::<T>(req),
(&Method::GET, "/node/version") => node::get_version(req),
(&Method::GET, "/node/genesis_time") => node::get_genesis_time::<T>(req),
_ => Err(ApiError::MethodNotAllowed(path.clone())),
@ -154,3 +161,14 @@ fn success_response(body: Body) -> Response<Body> {
.body(body)
.expect("We should always be able to make response from the success body.")
}
#[derive(Clone)]
pub struct DBPath(PathBuf);
impl Deref for DBPath {
type Target = PathBuf;
fn deref(&self) -> &Self::Target {
&self.0
}
}

View File

@ -1,12 +1,26 @@
use crate::{success_response, ApiError, ApiResult};
use crate::{success_response, ApiError, ApiResult, DBPath};
use beacon_chain::{BeaconChain, BeaconChainTypes};
use hyper::{Body, Request};
use prometheus::{Encoder, TextEncoder};
use std::sync::Arc;
/// Returns the full set of Prometheus metrics for the Beacon Node application.
pub fn get_prometheus(_req: Request<Body>) -> ApiResult {
pub fn get_prometheus<T: BeaconChainTypes + 'static>(req: Request<Body>) -> ApiResult {
let mut buffer = vec![];
let encoder = TextEncoder::new();
let beacon_chain = req
.extensions()
.get::<Arc<BeaconChain<T>>>()
.ok_or_else(|| ApiError::ServerError("Beacon chain extension missing".to_string()))?;
let db_path = req
.extensions()
.get::<DBPath>()
.ok_or_else(|| ApiError::ServerError("DBPath extension missing".to_string()))?;
store::scrape_for_metrics(&db_path);
beacon_chain::scrape_for_metrics(&beacon_chain);
encoder.encode(&prometheus::gather(), &mut buffer).unwrap();
String::from_utf8(buffer)

View File

@ -15,3 +15,5 @@ eth2_ssz = "0.1"
eth2_ssz_derive = "0.1"
tree_hash = "0.1"
types = { path = "../../eth2/types" }
lazy_static = "1.3.0"
lighthouse_metrics = { path = "../../eth2/utils/lighthouse_metrics" }

View File

@ -7,18 +7,22 @@
//!
//! Provides a simple API for storing/retrieving all types that sometimes needs type-hints. See
//! tests for implementation examples.
#[macro_use]
extern crate lazy_static;
mod block_at_slot;
mod errors;
mod impls;
mod leveldb_store;
mod memory_store;
mod metrics;
pub mod iter;
pub use self::leveldb_store::LevelDB as DiskStore;
pub use self::memory_store::MemoryStore;
pub use errors::Error;
pub use metrics::scrape_for_metrics;
pub use types::*;
/// An object capable of storing and retrieving objects implementing `StoreItem`.

View File

@ -0,0 +1,25 @@
pub use lighthouse_metrics::{set_gauge, try_create_int_gauge, *};
use std::fs;
use std::path::PathBuf;
lazy_static! {
pub static ref DISK_DB_SIZE: Result<IntGauge> =
try_create_int_gauge("database_size", "Size of the on-disk database (bytes)");
}
/// Updates the global metrics registry with store-related information.
pub fn scrape_for_metrics(db_path: &PathBuf) {
let db_size = if let Ok(iter) = fs::read_dir(db_path) {
iter.filter_map(std::result::Result::ok)
.map(size_of_dir_entry)
.fold(0_u64, |sum, val| sum + val)
} else {
0
};
set_gauge(&DISK_DB_SIZE, db_size as i64);
}
fn size_of_dir_entry(dir: fs::DirEntry) -> u64 {
dir.metadata().map(|m| m.len()).unwrap_or(0)
}

View File

@ -1,6 +1,6 @@
use prometheus::{HistogramOpts, HistogramTimer, Opts};
pub use prometheus::{Histogram, IntCounter, Result};
pub use prometheus::{Histogram, IntCounter, IntGauge, Result};
pub fn try_create_int_counter(name: &str, help: &str) -> Result<IntCounter> {
let opts = Opts::new(name, help);
@ -9,6 +9,13 @@ pub fn try_create_int_counter(name: &str, help: &str) -> Result<IntCounter> {
Ok(counter)
}
pub fn try_create_int_gauge(name: &str, help: &str) -> Result<IntGauge> {
let opts = Opts::new(name, help);
let gauge = IntGauge::with_opts(opts)?;
prometheus::register(Box::new(gauge.clone()))?;
Ok(gauge)
}
pub fn try_create_histogram(name: &str, help: &str) -> Result<Histogram> {
let opts = HistogramOpts::new(name, help);
let histogram = Histogram::with_opts(opts)?;
@ -34,6 +41,12 @@ pub fn inc_counter(counter: &Result<IntCounter>) {
}
}
pub fn set_gauge(gauge: &Result<IntGauge>, value: i64) {
if let Ok(gauge) = gauge {
gauge.set(value);
}
}
pub fn observe(histogram: &Result<Histogram>, value: f64) {
if let Ok(histogram) = histogram {
histogram.observe(value);