Health Endpoints for UI (#3668)
This PR adds some health endpoints for the beacon node and the validator client. Specifically it adds the endpoint: `/lighthouse/ui/health` These are not entirely stable yet. But provide a base for modification for our UI. These also may have issues with various platforms and may need modification.
This commit is contained in:
parent
9bd6d9ce7a
commit
230168deff
948
Cargo.lock
generated
948
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@ -37,6 +37,7 @@ members = [
|
||||
"common/oneshot_broadcast",
|
||||
"common/sensitive_url",
|
||||
"common/slot_clock",
|
||||
"common/system_health",
|
||||
"common/task_executor",
|
||||
"common/target_check",
|
||||
"common/test_random_derive",
|
||||
|
@ -42,7 +42,7 @@ pub enum ClientGenesis {
|
||||
/// The core configuration of a Lighthouse beacon node.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Config {
|
||||
pub data_dir: PathBuf,
|
||||
data_dir: PathBuf,
|
||||
/// Name of the directory inside the data directory where the main "hot" DB is located.
|
||||
pub db_name: String,
|
||||
/// Path where the freezer database will be located.
|
||||
@ -103,6 +103,17 @@ impl Default for Config {
|
||||
}
|
||||
|
||||
impl Config {
|
||||
/// Updates the data directory for the Client.
|
||||
pub fn set_data_dir(&mut self, data_dir: PathBuf) {
|
||||
self.data_dir = data_dir.clone();
|
||||
self.http_api.data_dir = data_dir;
|
||||
}
|
||||
|
||||
/// Gets the config's data_dir.
|
||||
pub fn data_dir(&self) -> &PathBuf {
|
||||
&self.data_dir
|
||||
}
|
||||
|
||||
/// Get the database path without initialising it.
|
||||
pub fn get_db_path(&self) -> PathBuf {
|
||||
self.get_data_dir().join(&self.db_name)
|
||||
|
@ -33,6 +33,9 @@ safe_arith = {path = "../../consensus/safe_arith"}
|
||||
task_executor = { path = "../../common/task_executor" }
|
||||
lru = "0.7.7"
|
||||
tree_hash = "0.4.1"
|
||||
sysinfo = "0.26.5"
|
||||
system_health = { path = "../../common/system_health" }
|
||||
directory = { path = "../../common/directory" }
|
||||
|
||||
[dev-dependencies]
|
||||
store = { path = "../store" }
|
||||
|
@ -26,12 +26,14 @@ use beacon_chain::{
|
||||
BeaconChainTypes, ProduceBlockVerification, WhenSlotSkipped,
|
||||
};
|
||||
pub use block_id::BlockId;
|
||||
use directory::DEFAULT_ROOT_DIR;
|
||||
use eth2::types::{
|
||||
self as api_types, EndpointVersion, SkipRandaoVerification, ValidatorId, ValidatorStatus,
|
||||
};
|
||||
use lighthouse_network::{types::SyncState, EnrExt, NetworkGlobals, PeerId, PubsubMessage};
|
||||
use lighthouse_version::version_with_platform;
|
||||
use network::{NetworkMessage, NetworkSenders, ValidatorSubscriptionMessage};
|
||||
use parking_lot::RwLock;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use slog::{crit, debug, error, info, warn, Logger};
|
||||
use slot_clock::SlotClock;
|
||||
@ -43,6 +45,8 @@ use std::net::{IpAddr, Ipv4Addr, SocketAddr};
|
||||
use std::path::PathBuf;
|
||||
use std::pin::Pin;
|
||||
use std::sync::Arc;
|
||||
use sysinfo::{System, SystemExt};
|
||||
use system_health::observe_system_health_bn;
|
||||
use tokio::sync::mpsc::{Sender, UnboundedSender};
|
||||
use tokio_stream::{wrappers::BroadcastStream, StreamExt};
|
||||
use types::{
|
||||
@ -110,6 +114,7 @@ pub struct Config {
|
||||
pub tls_config: Option<TlsConfig>,
|
||||
pub allow_sync_stalled: bool,
|
||||
pub spec_fork_name: Option<ForkName>,
|
||||
pub data_dir: PathBuf,
|
||||
}
|
||||
|
||||
impl Default for Config {
|
||||
@ -122,6 +127,7 @@ impl Default for Config {
|
||||
tls_config: None,
|
||||
allow_sync_stalled: false,
|
||||
spec_fork_name: None,
|
||||
data_dir: PathBuf::from(DEFAULT_ROOT_DIR),
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -323,6 +329,10 @@ pub fn serve<T: BeaconChainTypes>(
|
||||
}
|
||||
});
|
||||
|
||||
// Create a `warp` filter for the data_dir.
|
||||
let inner_data_dir = ctx.config.data_dir.clone();
|
||||
let data_dir_filter = warp::any().map(move || inner_data_dir.clone());
|
||||
|
||||
// Create a `warp` filter that provides access to the beacon chain.
|
||||
let inner_ctx = ctx.clone();
|
||||
let chain_filter =
|
||||
@ -431,6 +441,37 @@ pub fn serve<T: BeaconChainTypes>(
|
||||
let inner_ctx = ctx.clone();
|
||||
let log_filter = warp::any().map(move || inner_ctx.log.clone());
|
||||
|
||||
// Create a `warp` filter that provides access to local system information.
|
||||
let system_info = Arc::new(RwLock::new(sysinfo::System::new()));
|
||||
{
|
||||
// grab write access for initialisation
|
||||
let mut system_info = system_info.write();
|
||||
system_info.refresh_disks_list();
|
||||
system_info.refresh_networks_list();
|
||||
system_info.refresh_cpu_specifics(sysinfo::CpuRefreshKind::everything());
|
||||
system_info.refresh_cpu();
|
||||
} // end lock
|
||||
|
||||
let system_info_filter =
|
||||
warp::any()
|
||||
.map(move || system_info.clone())
|
||||
.map(|sysinfo: Arc<RwLock<System>>| {
|
||||
{
|
||||
// refresh stats
|
||||
let mut sysinfo_lock = sysinfo.write();
|
||||
sysinfo_lock.refresh_memory();
|
||||
sysinfo_lock.refresh_cpu_specifics(sysinfo::CpuRefreshKind::everything());
|
||||
sysinfo_lock.refresh_cpu();
|
||||
sysinfo_lock.refresh_system();
|
||||
sysinfo_lock.refresh_networks();
|
||||
sysinfo_lock.refresh_disks();
|
||||
} // end lock
|
||||
sysinfo
|
||||
});
|
||||
|
||||
let app_start = std::time::Instant::now();
|
||||
let app_start_filter = warp::any().map(move || app_start);
|
||||
|
||||
/*
|
||||
*
|
||||
* Start of HTTP method definitions.
|
||||
@ -2822,6 +2863,29 @@ pub fn serve<T: BeaconChainTypes>(
|
||||
})
|
||||
});
|
||||
|
||||
// GET lighthouse/ui/health
|
||||
let get_lighthouse_ui_health = warp::path("lighthouse")
|
||||
.and(warp::path("ui"))
|
||||
.and(warp::path("health"))
|
||||
.and(warp::path::end())
|
||||
.and(system_info_filter)
|
||||
.and(app_start_filter)
|
||||
.and(data_dir_filter)
|
||||
.and(network_globals.clone())
|
||||
.and_then(
|
||||
|sysinfo, app_start: std::time::Instant, data_dir, network_globals| {
|
||||
blocking_json_task(move || {
|
||||
let app_uptime = app_start.elapsed().as_secs() as u64;
|
||||
Ok(api_types::GenericResponse::from(observe_system_health_bn(
|
||||
sysinfo,
|
||||
data_dir,
|
||||
app_uptime,
|
||||
network_globals,
|
||||
)))
|
||||
})
|
||||
},
|
||||
);
|
||||
|
||||
// GET lighthouse/syncing
|
||||
let get_lighthouse_syncing = warp::path("lighthouse")
|
||||
.and(warp::path("syncing"))
|
||||
@ -3271,6 +3335,7 @@ pub fn serve<T: BeaconChainTypes>(
|
||||
.or(get_validator_aggregate_attestation.boxed())
|
||||
.or(get_validator_sync_committee_contribution.boxed())
|
||||
.or(get_lighthouse_health.boxed())
|
||||
.or(get_lighthouse_ui_health.boxed())
|
||||
.or(get_lighthouse_syncing.boxed())
|
||||
.or(get_lighthouse_nat.boxed())
|
||||
.or(get_lighthouse_peers.boxed())
|
||||
|
@ -2,6 +2,7 @@ use beacon_chain::{
|
||||
test_utils::{BeaconChainHarness, EphemeralHarnessType},
|
||||
BeaconChain, BeaconChainTypes,
|
||||
};
|
||||
use directory::DEFAULT_ROOT_DIR;
|
||||
use eth2::{BeaconNodeHttpClient, Timeouts};
|
||||
use http_api::{Config, Context};
|
||||
use lighthouse_network::{
|
||||
@ -142,6 +143,7 @@ pub async fn create_api_server_on_port<T: BeaconChainTypes>(
|
||||
allow_origin: None,
|
||||
tls_config: None,
|
||||
allow_sync_stalled: false,
|
||||
data_dir: std::path::PathBuf::from(DEFAULT_ROOT_DIR),
|
||||
spec_fork_name: None,
|
||||
},
|
||||
chain: Some(chain.clone()),
|
||||
|
@ -8,7 +8,6 @@ use libp2p::gossipsub::subscription_filter::{
|
||||
};
|
||||
use libp2p::gossipsub::Gossipsub as BaseGossipsub;
|
||||
use libp2p::identify::Identify;
|
||||
use libp2p::swarm::NetworkBehaviour;
|
||||
use libp2p::NetworkBehaviour;
|
||||
use types::EthSpec;
|
||||
|
||||
|
@ -34,13 +34,13 @@ pub fn get_config<E: EthSpec>(
|
||||
let spec = &context.eth2_config.spec;
|
||||
let log = context.log();
|
||||
|
||||
let mut client_config = ClientConfig {
|
||||
data_dir: get_data_dir(cli_args),
|
||||
..Default::default()
|
||||
};
|
||||
let mut client_config = ClientConfig::default();
|
||||
|
||||
// Update the client's data directory
|
||||
client_config.set_data_dir(get_data_dir(cli_args));
|
||||
|
||||
// If necessary, remove any existing database and configuration
|
||||
if client_config.data_dir.exists() && cli_args.is_present("purge-db") {
|
||||
if client_config.data_dir().exists() && cli_args.is_present("purge-db") {
|
||||
// Remove the chain_db.
|
||||
let chain_db = client_config.get_db_path();
|
||||
if chain_db.exists() {
|
||||
@ -57,11 +57,11 @@ pub fn get_config<E: EthSpec>(
|
||||
}
|
||||
|
||||
// Create `datadir` and any non-existing parent directories.
|
||||
fs::create_dir_all(&client_config.data_dir)
|
||||
fs::create_dir_all(client_config.data_dir())
|
||||
.map_err(|e| format!("Failed to create data dir: {}", e))?;
|
||||
|
||||
// logs the chosen data directory
|
||||
let mut log_dir = client_config.data_dir.clone();
|
||||
let mut log_dir = client_config.data_dir().clone();
|
||||
// remove /beacon from the end
|
||||
log_dir.pop();
|
||||
info!(log, "Data directory initialised"; "datadir" => log_dir.into_os_string().into_string().expect("Datadir should be a valid os string"));
|
||||
@ -69,10 +69,13 @@ pub fn get_config<E: EthSpec>(
|
||||
/*
|
||||
* Networking
|
||||
*/
|
||||
|
||||
let data_dir_ref = client_config.data_dir().clone();
|
||||
|
||||
set_network_config(
|
||||
&mut client_config.network,
|
||||
cli_args,
|
||||
&client_config.data_dir,
|
||||
&data_dir_ref,
|
||||
log,
|
||||
false,
|
||||
)?;
|
||||
@ -303,7 +306,7 @@ pub fn get_config<E: EthSpec>(
|
||||
} else if let Some(jwt_secret_key) = cli_args.value_of("execution-jwt-secret-key") {
|
||||
use std::fs::File;
|
||||
use std::io::Write;
|
||||
secret_file = client_config.data_dir.join(DEFAULT_JWT_FILE);
|
||||
secret_file = client_config.data_dir().join(DEFAULT_JWT_FILE);
|
||||
let mut jwt_secret_key_file = File::create(secret_file.clone())
|
||||
.map_err(|e| format!("Error while creating jwt_secret_key file: {:?}", e))?;
|
||||
jwt_secret_key_file
|
||||
@ -332,7 +335,7 @@ pub fn get_config<E: EthSpec>(
|
||||
clap_utils::parse_optional(cli_args, "suggested-fee-recipient")?;
|
||||
el_config.jwt_id = clap_utils::parse_optional(cli_args, "execution-jwt-id")?;
|
||||
el_config.jwt_version = clap_utils::parse_optional(cli_args, "execution-jwt-version")?;
|
||||
el_config.default_datadir = client_config.data_dir.clone();
|
||||
el_config.default_datadir = client_config.data_dir().clone();
|
||||
el_config.builder_profit_threshold =
|
||||
clap_utils::parse_required(cli_args, "builder-profit-threshold")?;
|
||||
let execution_timeout_multiplier =
|
||||
@ -573,7 +576,7 @@ pub fn get_config<E: EthSpec>(
|
||||
let slasher_dir = if let Some(slasher_dir) = cli_args.value_of("slasher-dir") {
|
||||
PathBuf::from(slasher_dir)
|
||||
} else {
|
||||
client_config.data_dir.join("slasher_db")
|
||||
client_config.data_dir().join("slasher_db")
|
||||
};
|
||||
|
||||
let mut slasher_config = slasher::Config::new(slasher_dir);
|
||||
|
@ -62,6 +62,43 @@ curl -X GET "http://localhost:5052/lighthouse/health" -H "accept: application/j
|
||||
|
||||
```
|
||||
|
||||
### `/lighthouse/ui/health`
|
||||
|
||||
|
||||
```bash
|
||||
curl -X GET "http://localhost:5052/lighthouse/ui/health" -H "accept: application/json" | jq
|
||||
```
|
||||
|
||||
```json
|
||||
{
|
||||
"data": {
|
||||
"total_memory": 16443219968,
|
||||
"free_memory": 1283739648,
|
||||
"used_memory": 5586264064,
|
||||
"sys_loadavg_1": 0.59,
|
||||
"sys_loadavg_5": 1.13,
|
||||
"sys_loadavg_15": 2.41,
|
||||
"cpu_cores": 4,
|
||||
"cpu_threads": 8,
|
||||
"global_cpu_frequency": 3.4,
|
||||
"disk_bytes_total": 502390845440,
|
||||
"disk_bytes_free": 9981386752,
|
||||
"network_name": "wlp0s20f3",
|
||||
"network_bytes_total_received": 14105556611,
|
||||
"network_bytes_total_transmit": 3649489389,
|
||||
"nat_open": true,
|
||||
"connected_peers": 80,
|
||||
"sync_state": "Synced",
|
||||
"system_uptime": 660706,
|
||||
"app_uptime": 105,
|
||||
"system_name": "Arch Linux",
|
||||
"kernel_version": "5.19.13-arch1-1",
|
||||
"os_version": "Linux rolling Arch Linux",
|
||||
"host_name": "Computer1"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### `/lighthouse/syncing`
|
||||
|
||||
```bash
|
||||
|
@ -6,6 +6,7 @@ HTTP Path | Description |
|
||||
| --- | -- |
|
||||
[`GET /lighthouse/version`](#get-lighthouseversion) | Get the Lighthouse software version.
|
||||
[`GET /lighthouse/health`](#get-lighthousehealth) | Get information about the host machine.
|
||||
[`GET /lighthouse/ui/health`](#get-lighthouseuihealth) | Get information about the host machine. Focused for UI applications.
|
||||
[`GET /lighthouse/spec`](#get-lighthousespec) | Get the Ethereum proof-of-stake consensus specification used by the validator.
|
||||
[`GET /lighthouse/auth`](#get-lighthouseauth) | Get the location of the authorization token.
|
||||
[`GET /lighthouse/validators`](#get-lighthousevalidators) | List all validators.
|
||||
@ -77,6 +78,45 @@ Returns information regarding the health of the host machine.
|
||||
}
|
||||
```
|
||||
|
||||
## `GET /lighthouse/ui/health`
|
||||
|
||||
Returns information regarding the health of the host machine.
|
||||
|
||||
### HTTP Specification
|
||||
|
||||
| Property | Specification |
|
||||
|-------------------|--------------------------------------------|
|
||||
| Path | `/lighthouse/ui/health` |
|
||||
| Method | GET |
|
||||
| Required Headers | [`Authorization`](./api-vc-auth-header.md) |
|
||||
| Typical Responses | 200 |
|
||||
|
||||
### Example Response Body
|
||||
|
||||
```json
|
||||
{
|
||||
"data": {
|
||||
"total_memory": 16443219968,
|
||||
"free_memory": 1283739648,
|
||||
"used_memory": 5586264064,
|
||||
"sys_loadavg_1": 0.59,
|
||||
"sys_loadavg_5": 1.13,
|
||||
"sys_loadavg_15": 2.41,
|
||||
"cpu_cores": 4,
|
||||
"cpu_threads": 8,
|
||||
"global_cpu_frequency": 3.4,
|
||||
"disk_bytes_total": 502390845440,
|
||||
"disk_bytes_free": 9981386752,
|
||||
"system_uptime": 660706,
|
||||
"app_uptime": 105,
|
||||
"system_name": "Arch Linux",
|
||||
"kernel_version": "5.19.13-arch1-1",
|
||||
"os_version": "Linux rolling Arch Linux",
|
||||
"host_name": "Computer1"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## `GET /lighthouse/spec`
|
||||
|
||||
Returns the Ethereum proof-of-stake consensus specification loaded for this validator.
|
||||
|
13
common/system_health/Cargo.toml
Normal file
13
common/system_health/Cargo.toml
Normal file
@ -0,0 +1,13 @@
|
||||
[package]
|
||||
name = "system_health"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
lighthouse_network = { path = "../../beacon_node/lighthouse_network" }
|
||||
types = { path = "../../consensus/types" }
|
||||
sysinfo = "0.26.5"
|
||||
serde = "1.0.116"
|
||||
serde_derive = "1.0.116"
|
||||
serde_json = "1.0.58"
|
||||
parking_lot = "0.12.0"
|
241
common/system_health/src/lib.rs
Normal file
241
common/system_health/src/lib.rs
Normal file
@ -0,0 +1,241 @@
|
||||
use lighthouse_network::{types::SyncState, NetworkGlobals};
|
||||
use parking_lot::RwLock;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
use sysinfo::{CpuExt, DiskExt, NetworkExt, NetworksExt, System, SystemExt};
|
||||
use types::EthSpec;
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||
pub struct SystemHealth {
|
||||
/// Total memory of the system.
|
||||
pub total_memory: u64,
|
||||
/// Total free memory available to the system.
|
||||
pub free_memory: u64,
|
||||
/// Total used memory.
|
||||
pub used_memory: u64,
|
||||
|
||||
/// System load average over 1 minute.
|
||||
pub sys_loadavg_1: f64,
|
||||
/// System load average over 5 minutes.
|
||||
pub sys_loadavg_5: f64,
|
||||
/// System load average over 15 minutes.
|
||||
pub sys_loadavg_15: f64,
|
||||
|
||||
/// Total cpu cores.
|
||||
pub cpu_cores: usize,
|
||||
/// Total cpu threads.
|
||||
pub cpu_threads: usize,
|
||||
/// The global cpu frequency.
|
||||
pub global_cpu_frequency: f32,
|
||||
|
||||
/// Total capacity of disk.
|
||||
pub disk_bytes_total: u64,
|
||||
/// Free space in disk.
|
||||
pub disk_bytes_free: u64,
|
||||
|
||||
/// System uptime.
|
||||
pub system_uptime: u64,
|
||||
/// Application uptime.
|
||||
pub app_uptime: u64,
|
||||
/// The System name
|
||||
pub system_name: String,
|
||||
/// Kernel version
|
||||
pub kernel_version: String,
|
||||
/// OS version
|
||||
pub os_version: String,
|
||||
/// Hostname
|
||||
pub host_name: String,
|
||||
}
|
||||
|
||||
/// System related health, specific to the UI for the validator client.
|
||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||
pub struct SystemHealthVC {
|
||||
#[serde(flatten)]
|
||||
pub system_health: SystemHealth,
|
||||
}
|
||||
|
||||
/// System related health, specific to the UI for the Beacon Node.
|
||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||
pub struct SystemHealthBN {
|
||||
#[serde(flatten)]
|
||||
pub system_health: SystemHealth,
|
||||
/// The name of the network that uses the most traffic.
|
||||
pub network_name: String,
|
||||
/// Total bytes received over the main interface.
|
||||
pub network_bytes_total_received: u64,
|
||||
/// Total bytes sent over the main interface.
|
||||
pub network_bytes_total_transmit: u64,
|
||||
|
||||
/// The current NAT status.
|
||||
pub nat_open: bool,
|
||||
/// The current number of connected peers.
|
||||
pub connected_peers: usize,
|
||||
/// The current syncing state of the consensus node.
|
||||
pub sync_state: SyncState,
|
||||
}
|
||||
|
||||
/// Populates the system health.
|
||||
fn observe_system_health(
|
||||
sysinfo: Arc<RwLock<System>>,
|
||||
data_dir: PathBuf,
|
||||
app_uptime: u64,
|
||||
) -> SystemHealth {
|
||||
let sysinfo = sysinfo.read();
|
||||
let loadavg = sysinfo.load_average();
|
||||
|
||||
let cpus = sysinfo.cpus();
|
||||
|
||||
let disks = sysinfo.disks();
|
||||
|
||||
let system_uptime = sysinfo.uptime();
|
||||
|
||||
// Helper functions to extract specific data
|
||||
|
||||
// Find fs associated with the data dir location and report this
|
||||
let (disk_bytes_total, disk_bytes_free) = {
|
||||
// There is no clean way to find this in an OS-agnostic way. We take a simple approach,
|
||||
// which is attempt to match the mount_point to the data_dir. If this cannot be done, we
|
||||
// just fallback to the root fs.
|
||||
|
||||
let mut root_fs_disk = None;
|
||||
let mut other_matching_fs = None;
|
||||
|
||||
for disk in disks.iter() {
|
||||
if disk.mount_point() == Path::new("/")
|
||||
|| disk.mount_point() == Path::new("C:\\")
|
||||
|| disk.mount_point() == Path::new("/System/Volumes/Data")
|
||||
{
|
||||
// Found the usual default root_fs
|
||||
root_fs_disk = Some(disk);
|
||||
continue;
|
||||
}
|
||||
|
||||
// If we have other file systems, compare these to the data_dir of Lighthouse and
|
||||
// prioritize these.
|
||||
if data_dir
|
||||
.to_str()
|
||||
.map(|path| {
|
||||
if let Some(mount_str) = disk.mount_point().to_str() {
|
||||
path.contains(mount_str)
|
||||
} else {
|
||||
false
|
||||
}
|
||||
})
|
||||
.unwrap_or(false)
|
||||
{
|
||||
other_matching_fs = Some(disk);
|
||||
break; // Don't bother finding other competing fs.
|
||||
}
|
||||
}
|
||||
|
||||
// If we found a file system other than the root, report this, otherwise just report the
|
||||
// root fs
|
||||
let fs = other_matching_fs.or(root_fs_disk);
|
||||
|
||||
// If the root fs is not known, just add up the total of all known partitions
|
||||
match fs {
|
||||
Some(fs) => (fs.total_space(), fs.available_space()),
|
||||
None => {
|
||||
// If we can't find a known partition, just add them all up
|
||||
disks.iter().fold((0, 0), |mut current_sizes, disk| {
|
||||
current_sizes.0 += disk.total_space();
|
||||
current_sizes.1 += disk.available_space();
|
||||
current_sizes
|
||||
})
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Attempt to get the clock speed from the name of the CPU
|
||||
let cpu_frequency_from_name = cpus.iter().next().and_then(|cpu| {
|
||||
cpu.brand()
|
||||
.split_once("GHz")
|
||||
.and_then(|(result, _)| result.trim().rsplit_once(' '))
|
||||
.and_then(|(_, result)| result.parse::<f32>().ok())
|
||||
});
|
||||
|
||||
let global_cpu_frequency = match cpu_frequency_from_name {
|
||||
Some(freq) => freq,
|
||||
None => {
|
||||
// Get the frequency from average measured frequencies
|
||||
let global_cpu_frequency: f32 =
|
||||
cpus.iter().map(|cpu| cpu.frequency()).sum::<u64>() as f32 / cpus.len() as f32;
|
||||
// Shift to ghz to 1dp
|
||||
(global_cpu_frequency / 100.0).round() / 10.0
|
||||
}
|
||||
};
|
||||
|
||||
SystemHealth {
|
||||
total_memory: sysinfo.total_memory(),
|
||||
free_memory: sysinfo.free_memory(),
|
||||
used_memory: sysinfo.used_memory(),
|
||||
sys_loadavg_1: loadavg.one,
|
||||
sys_loadavg_5: loadavg.five,
|
||||
sys_loadavg_15: loadavg.fifteen,
|
||||
cpu_cores: sysinfo.physical_core_count().unwrap_or(0),
|
||||
cpu_threads: cpus.len(),
|
||||
global_cpu_frequency,
|
||||
disk_bytes_total,
|
||||
disk_bytes_free,
|
||||
system_uptime,
|
||||
app_uptime,
|
||||
system_name: sysinfo.name().unwrap_or_else(|| String::from("")),
|
||||
kernel_version: sysinfo.kernel_version().unwrap_or_else(|| "".into()),
|
||||
os_version: sysinfo.long_os_version().unwrap_or_else(|| "".into()),
|
||||
host_name: sysinfo.host_name().unwrap_or_else(|| "".into()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Observes the Validator client system health.
|
||||
pub fn observe_system_health_vc(
|
||||
sysinfo: Arc<RwLock<System>>,
|
||||
data_dir: PathBuf,
|
||||
app_uptime: u64,
|
||||
) -> SystemHealthVC {
|
||||
SystemHealthVC {
|
||||
system_health: observe_system_health(sysinfo, data_dir, app_uptime),
|
||||
}
|
||||
}
|
||||
|
||||
/// Observes the Beacon Node system health.
|
||||
pub fn observe_system_health_bn<TSpec: EthSpec>(
|
||||
sysinfo: Arc<RwLock<System>>,
|
||||
data_dir: PathBuf,
|
||||
app_uptime: u64,
|
||||
network_globals: Arc<NetworkGlobals<TSpec>>,
|
||||
) -> SystemHealthBN {
|
||||
let system_health = observe_system_health(sysinfo.clone(), data_dir, app_uptime);
|
||||
|
||||
// Find the network with the most traffic and assume this is the main network
|
||||
let sysinfo = sysinfo.read();
|
||||
let networks = sysinfo.networks();
|
||||
let (network_name, network_bytes_total_received, network_bytes_total_transmit) = networks
|
||||
.iter()
|
||||
.max_by_key(|(_name, network)| network.total_received())
|
||||
.map(|(name, network)| {
|
||||
(
|
||||
name.clone(),
|
||||
network.total_received(),
|
||||
network.total_transmitted(),
|
||||
)
|
||||
})
|
||||
.unwrap_or_else(|| (String::from("None"), 0, 0));
|
||||
|
||||
// Determine if the NAT is open or not.
|
||||
let nat_open = lighthouse_network::metrics::NAT_OPEN
|
||||
.as_ref()
|
||||
.map(|v| v.get())
|
||||
.unwrap_or(0)
|
||||
!= 0;
|
||||
|
||||
SystemHealthBN {
|
||||
system_health,
|
||||
network_name,
|
||||
network_bytes_total_received,
|
||||
network_bytes_total_transmit,
|
||||
nat_open,
|
||||
connected_peers: network_globals.connected_peers(),
|
||||
sync_state: network_globals.sync_state(),
|
||||
}
|
||||
}
|
@ -98,10 +98,9 @@ fn parse_client_config<E: EthSpec>(
|
||||
cli_args: &ArgMatches,
|
||||
_env: &Environment<E>,
|
||||
) -> Result<ClientConfig, String> {
|
||||
let mut client_config = ClientConfig {
|
||||
data_dir: get_data_dir(cli_args),
|
||||
..Default::default()
|
||||
};
|
||||
let mut client_config = ClientConfig::default();
|
||||
|
||||
client_config.set_data_dir(get_data_dir(cli_args));
|
||||
|
||||
if let Some(freezer_dir) = clap_utils::parse_optional(cli_args, "freezer-dir")? {
|
||||
client_config.freezer_db_path = Some(freezer_dir);
|
||||
|
@ -56,7 +56,9 @@ impl CommandLineTestExec for CommandLineTest {
|
||||
fn datadir_flag() {
|
||||
CommandLineTest::new()
|
||||
.run_with_zero_port()
|
||||
.with_config_and_dir(|config, dir| assert_eq!(config.data_dir, dir.path().join("beacon")));
|
||||
.with_config_and_dir(|config, dir| {
|
||||
assert_eq!(*config.data_dir(), dir.path().join("beacon"))
|
||||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -48,7 +48,7 @@ impl<E: EthSpec> LocalBeaconNode<E> {
|
||||
.tempdir()
|
||||
.expect("should create temp directory for client datadir");
|
||||
|
||||
client_config.data_dir = datadir.path().into();
|
||||
client_config.set_data_dir(datadir.path().into());
|
||||
client_config.network.network_dir = PathBuf::from(datadir.path()).join("network");
|
||||
|
||||
ProductionBeaconNode::new(context, client_config)
|
||||
|
@ -59,3 +59,6 @@ task_executor = { path = "../common/task_executor" }
|
||||
reqwest = { version = "0.11.0", features = ["json","stream"] }
|
||||
url = "2.2.2"
|
||||
malloc_utils = { path = "../common/malloc_utils" }
|
||||
sysinfo = "0.26.5"
|
||||
system_health = { path = "../common/system_health" }
|
||||
|
||||
|
@ -16,6 +16,7 @@ use eth2::lighthouse_vc::{
|
||||
types::{self as api_types, GenericResponse, PublicKey, PublicKeyBytes},
|
||||
};
|
||||
use lighthouse_version::version_with_platform;
|
||||
use parking_lot::RwLock;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use slog::{crit, info, warn, Logger};
|
||||
use slot_clock::SlotClock;
|
||||
@ -24,6 +25,8 @@ use std::marker::PhantomData;
|
||||
use std::net::{IpAddr, Ipv4Addr, SocketAddr};
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use sysinfo::{System, SystemExt};
|
||||
use system_health::observe_system_health_vc;
|
||||
use task_executor::TaskExecutor;
|
||||
use types::{ChainSpec, ConfigAndPreset, EthSpec};
|
||||
use validator_dir::Builder as ValidatorDirBuilder;
|
||||
@ -183,6 +186,35 @@ pub fn serve<T: 'static + SlotClock + Clone, E: EthSpec>(
|
||||
let api_token_path_inner = api_token_path.clone();
|
||||
let api_token_path_filter = warp::any().map(move || api_token_path_inner.clone());
|
||||
|
||||
// Create a `warp` filter that provides access to local system information.
|
||||
let system_info = Arc::new(RwLock::new(sysinfo::System::new()));
|
||||
{
|
||||
// grab write access for initialisation
|
||||
let mut system_info = system_info.write();
|
||||
system_info.refresh_disks_list();
|
||||
system_info.refresh_networks_list();
|
||||
} // end lock
|
||||
|
||||
let system_info_filter =
|
||||
warp::any()
|
||||
.map(move || system_info.clone())
|
||||
.map(|sysinfo: Arc<RwLock<System>>| {
|
||||
{
|
||||
// refresh stats
|
||||
let mut sysinfo_lock = sysinfo.write();
|
||||
sysinfo_lock.refresh_memory();
|
||||
sysinfo_lock.refresh_cpu_specifics(sysinfo::CpuRefreshKind::everything());
|
||||
sysinfo_lock.refresh_cpu();
|
||||
sysinfo_lock.refresh_system();
|
||||
sysinfo_lock.refresh_networks();
|
||||
sysinfo_lock.refresh_disks();
|
||||
} // end lock
|
||||
sysinfo
|
||||
});
|
||||
|
||||
let app_start = std::time::Instant::now();
|
||||
let app_start_filter = warp::any().map(move || app_start);
|
||||
|
||||
// GET lighthouse/version
|
||||
let get_node_version = warp::path("lighthouse")
|
||||
.and(warp::path("version"))
|
||||
@ -279,6 +311,24 @@ pub fn serve<T: 'static + SlotClock + Clone, E: EthSpec>(
|
||||
},
|
||||
);
|
||||
|
||||
// GET lighthouse/ui/health
|
||||
let get_lighthouse_ui_health = warp::path("lighthouse")
|
||||
.and(warp::path("ui"))
|
||||
.and(warp::path("health"))
|
||||
.and(warp::path::end())
|
||||
.and(system_info_filter)
|
||||
.and(app_start_filter)
|
||||
.and(validator_dir_filter.clone())
|
||||
.and(signer.clone())
|
||||
.and_then(|sysinfo, app_start: std::time::Instant, val_dir, signer| {
|
||||
blocking_signed_json_task(signer, move || {
|
||||
let app_uptime = app_start.elapsed().as_secs() as u64;
|
||||
Ok(api_types::GenericResponse::from(observe_system_health_vc(
|
||||
sysinfo, val_dir, app_uptime,
|
||||
)))
|
||||
})
|
||||
});
|
||||
|
||||
// POST lighthouse/validators/
|
||||
let post_validators = warp::path("lighthouse")
|
||||
.and(warp::path("validators"))
|
||||
@ -894,6 +944,7 @@ pub fn serve<T: 'static + SlotClock + Clone, E: EthSpec>(
|
||||
.or(get_lighthouse_spec)
|
||||
.or(get_lighthouse_validators)
|
||||
.or(get_lighthouse_validators_pubkey)
|
||||
.or(get_lighthouse_ui_health)
|
||||
.or(get_fee_recipient)
|
||||
.or(get_gas_limit)
|
||||
.or(get_std_keystores)
|
||||
|
Loading…
Reference in New Issue
Block a user