Health Endpoints for UI (#3668)
This PR adds some health endpoints for the beacon node and the validator client. Specifically it adds the endpoint: `/lighthouse/ui/health` These are not entirely stable yet. But provide a base for modification for our UI. These also may have issues with various platforms and may need modification.
This commit is contained in:
parent
9bd6d9ce7a
commit
230168deff
948
Cargo.lock
generated
948
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@ -37,6 +37,7 @@ members = [
|
|||||||
"common/oneshot_broadcast",
|
"common/oneshot_broadcast",
|
||||||
"common/sensitive_url",
|
"common/sensitive_url",
|
||||||
"common/slot_clock",
|
"common/slot_clock",
|
||||||
|
"common/system_health",
|
||||||
"common/task_executor",
|
"common/task_executor",
|
||||||
"common/target_check",
|
"common/target_check",
|
||||||
"common/test_random_derive",
|
"common/test_random_derive",
|
||||||
|
@ -42,7 +42,7 @@ pub enum ClientGenesis {
|
|||||||
/// The core configuration of a Lighthouse beacon node.
|
/// The core configuration of a Lighthouse beacon node.
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
pub struct Config {
|
pub struct Config {
|
||||||
pub data_dir: PathBuf,
|
data_dir: PathBuf,
|
||||||
/// Name of the directory inside the data directory where the main "hot" DB is located.
|
/// Name of the directory inside the data directory where the main "hot" DB is located.
|
||||||
pub db_name: String,
|
pub db_name: String,
|
||||||
/// Path where the freezer database will be located.
|
/// Path where the freezer database will be located.
|
||||||
@ -103,6 +103,17 @@ impl Default for Config {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl Config {
|
impl Config {
|
||||||
|
/// Updates the data directory for the Client.
|
||||||
|
pub fn set_data_dir(&mut self, data_dir: PathBuf) {
|
||||||
|
self.data_dir = data_dir.clone();
|
||||||
|
self.http_api.data_dir = data_dir;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Gets the config's data_dir.
|
||||||
|
pub fn data_dir(&self) -> &PathBuf {
|
||||||
|
&self.data_dir
|
||||||
|
}
|
||||||
|
|
||||||
/// Get the database path without initialising it.
|
/// Get the database path without initialising it.
|
||||||
pub fn get_db_path(&self) -> PathBuf {
|
pub fn get_db_path(&self) -> PathBuf {
|
||||||
self.get_data_dir().join(&self.db_name)
|
self.get_data_dir().join(&self.db_name)
|
||||||
|
@ -33,6 +33,9 @@ safe_arith = {path = "../../consensus/safe_arith"}
|
|||||||
task_executor = { path = "../../common/task_executor" }
|
task_executor = { path = "../../common/task_executor" }
|
||||||
lru = "0.7.7"
|
lru = "0.7.7"
|
||||||
tree_hash = "0.4.1"
|
tree_hash = "0.4.1"
|
||||||
|
sysinfo = "0.26.5"
|
||||||
|
system_health = { path = "../../common/system_health" }
|
||||||
|
directory = { path = "../../common/directory" }
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
store = { path = "../store" }
|
store = { path = "../store" }
|
||||||
|
@ -26,12 +26,14 @@ use beacon_chain::{
|
|||||||
BeaconChainTypes, ProduceBlockVerification, WhenSlotSkipped,
|
BeaconChainTypes, ProduceBlockVerification, WhenSlotSkipped,
|
||||||
};
|
};
|
||||||
pub use block_id::BlockId;
|
pub use block_id::BlockId;
|
||||||
|
use directory::DEFAULT_ROOT_DIR;
|
||||||
use eth2::types::{
|
use eth2::types::{
|
||||||
self as api_types, EndpointVersion, SkipRandaoVerification, ValidatorId, ValidatorStatus,
|
self as api_types, EndpointVersion, SkipRandaoVerification, ValidatorId, ValidatorStatus,
|
||||||
};
|
};
|
||||||
use lighthouse_network::{types::SyncState, EnrExt, NetworkGlobals, PeerId, PubsubMessage};
|
use lighthouse_network::{types::SyncState, EnrExt, NetworkGlobals, PeerId, PubsubMessage};
|
||||||
use lighthouse_version::version_with_platform;
|
use lighthouse_version::version_with_platform;
|
||||||
use network::{NetworkMessage, NetworkSenders, ValidatorSubscriptionMessage};
|
use network::{NetworkMessage, NetworkSenders, ValidatorSubscriptionMessage};
|
||||||
|
use parking_lot::RwLock;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use slog::{crit, debug, error, info, warn, Logger};
|
use slog::{crit, debug, error, info, warn, Logger};
|
||||||
use slot_clock::SlotClock;
|
use slot_clock::SlotClock;
|
||||||
@ -43,6 +45,8 @@ use std::net::{IpAddr, Ipv4Addr, SocketAddr};
|
|||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
use std::pin::Pin;
|
use std::pin::Pin;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
use sysinfo::{System, SystemExt};
|
||||||
|
use system_health::observe_system_health_bn;
|
||||||
use tokio::sync::mpsc::{Sender, UnboundedSender};
|
use tokio::sync::mpsc::{Sender, UnboundedSender};
|
||||||
use tokio_stream::{wrappers::BroadcastStream, StreamExt};
|
use tokio_stream::{wrappers::BroadcastStream, StreamExt};
|
||||||
use types::{
|
use types::{
|
||||||
@ -110,6 +114,7 @@ pub struct Config {
|
|||||||
pub tls_config: Option<TlsConfig>,
|
pub tls_config: Option<TlsConfig>,
|
||||||
pub allow_sync_stalled: bool,
|
pub allow_sync_stalled: bool,
|
||||||
pub spec_fork_name: Option<ForkName>,
|
pub spec_fork_name: Option<ForkName>,
|
||||||
|
pub data_dir: PathBuf,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for Config {
|
impl Default for Config {
|
||||||
@ -122,6 +127,7 @@ impl Default for Config {
|
|||||||
tls_config: None,
|
tls_config: None,
|
||||||
allow_sync_stalled: false,
|
allow_sync_stalled: false,
|
||||||
spec_fork_name: None,
|
spec_fork_name: None,
|
||||||
|
data_dir: PathBuf::from(DEFAULT_ROOT_DIR),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -323,6 +329,10 @@ pub fn serve<T: BeaconChainTypes>(
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Create a `warp` filter for the data_dir.
|
||||||
|
let inner_data_dir = ctx.config.data_dir.clone();
|
||||||
|
let data_dir_filter = warp::any().map(move || inner_data_dir.clone());
|
||||||
|
|
||||||
// Create a `warp` filter that provides access to the beacon chain.
|
// Create a `warp` filter that provides access to the beacon chain.
|
||||||
let inner_ctx = ctx.clone();
|
let inner_ctx = ctx.clone();
|
||||||
let chain_filter =
|
let chain_filter =
|
||||||
@ -431,6 +441,37 @@ pub fn serve<T: BeaconChainTypes>(
|
|||||||
let inner_ctx = ctx.clone();
|
let inner_ctx = ctx.clone();
|
||||||
let log_filter = warp::any().map(move || inner_ctx.log.clone());
|
let log_filter = warp::any().map(move || inner_ctx.log.clone());
|
||||||
|
|
||||||
|
// Create a `warp` filter that provides access to local system information.
|
||||||
|
let system_info = Arc::new(RwLock::new(sysinfo::System::new()));
|
||||||
|
{
|
||||||
|
// grab write access for initialisation
|
||||||
|
let mut system_info = system_info.write();
|
||||||
|
system_info.refresh_disks_list();
|
||||||
|
system_info.refresh_networks_list();
|
||||||
|
system_info.refresh_cpu_specifics(sysinfo::CpuRefreshKind::everything());
|
||||||
|
system_info.refresh_cpu();
|
||||||
|
} // end lock
|
||||||
|
|
||||||
|
let system_info_filter =
|
||||||
|
warp::any()
|
||||||
|
.map(move || system_info.clone())
|
||||||
|
.map(|sysinfo: Arc<RwLock<System>>| {
|
||||||
|
{
|
||||||
|
// refresh stats
|
||||||
|
let mut sysinfo_lock = sysinfo.write();
|
||||||
|
sysinfo_lock.refresh_memory();
|
||||||
|
sysinfo_lock.refresh_cpu_specifics(sysinfo::CpuRefreshKind::everything());
|
||||||
|
sysinfo_lock.refresh_cpu();
|
||||||
|
sysinfo_lock.refresh_system();
|
||||||
|
sysinfo_lock.refresh_networks();
|
||||||
|
sysinfo_lock.refresh_disks();
|
||||||
|
} // end lock
|
||||||
|
sysinfo
|
||||||
|
});
|
||||||
|
|
||||||
|
let app_start = std::time::Instant::now();
|
||||||
|
let app_start_filter = warp::any().map(move || app_start);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
*
|
*
|
||||||
* Start of HTTP method definitions.
|
* Start of HTTP method definitions.
|
||||||
@ -2822,6 +2863,29 @@ pub fn serve<T: BeaconChainTypes>(
|
|||||||
})
|
})
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// GET lighthouse/ui/health
|
||||||
|
let get_lighthouse_ui_health = warp::path("lighthouse")
|
||||||
|
.and(warp::path("ui"))
|
||||||
|
.and(warp::path("health"))
|
||||||
|
.and(warp::path::end())
|
||||||
|
.and(system_info_filter)
|
||||||
|
.and(app_start_filter)
|
||||||
|
.and(data_dir_filter)
|
||||||
|
.and(network_globals.clone())
|
||||||
|
.and_then(
|
||||||
|
|sysinfo, app_start: std::time::Instant, data_dir, network_globals| {
|
||||||
|
blocking_json_task(move || {
|
||||||
|
let app_uptime = app_start.elapsed().as_secs() as u64;
|
||||||
|
Ok(api_types::GenericResponse::from(observe_system_health_bn(
|
||||||
|
sysinfo,
|
||||||
|
data_dir,
|
||||||
|
app_uptime,
|
||||||
|
network_globals,
|
||||||
|
)))
|
||||||
|
})
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
// GET lighthouse/syncing
|
// GET lighthouse/syncing
|
||||||
let get_lighthouse_syncing = warp::path("lighthouse")
|
let get_lighthouse_syncing = warp::path("lighthouse")
|
||||||
.and(warp::path("syncing"))
|
.and(warp::path("syncing"))
|
||||||
@ -3271,6 +3335,7 @@ pub fn serve<T: BeaconChainTypes>(
|
|||||||
.or(get_validator_aggregate_attestation.boxed())
|
.or(get_validator_aggregate_attestation.boxed())
|
||||||
.or(get_validator_sync_committee_contribution.boxed())
|
.or(get_validator_sync_committee_contribution.boxed())
|
||||||
.or(get_lighthouse_health.boxed())
|
.or(get_lighthouse_health.boxed())
|
||||||
|
.or(get_lighthouse_ui_health.boxed())
|
||||||
.or(get_lighthouse_syncing.boxed())
|
.or(get_lighthouse_syncing.boxed())
|
||||||
.or(get_lighthouse_nat.boxed())
|
.or(get_lighthouse_nat.boxed())
|
||||||
.or(get_lighthouse_peers.boxed())
|
.or(get_lighthouse_peers.boxed())
|
||||||
|
@ -2,6 +2,7 @@ use beacon_chain::{
|
|||||||
test_utils::{BeaconChainHarness, EphemeralHarnessType},
|
test_utils::{BeaconChainHarness, EphemeralHarnessType},
|
||||||
BeaconChain, BeaconChainTypes,
|
BeaconChain, BeaconChainTypes,
|
||||||
};
|
};
|
||||||
|
use directory::DEFAULT_ROOT_DIR;
|
||||||
use eth2::{BeaconNodeHttpClient, Timeouts};
|
use eth2::{BeaconNodeHttpClient, Timeouts};
|
||||||
use http_api::{Config, Context};
|
use http_api::{Config, Context};
|
||||||
use lighthouse_network::{
|
use lighthouse_network::{
|
||||||
@ -142,6 +143,7 @@ pub async fn create_api_server_on_port<T: BeaconChainTypes>(
|
|||||||
allow_origin: None,
|
allow_origin: None,
|
||||||
tls_config: None,
|
tls_config: None,
|
||||||
allow_sync_stalled: false,
|
allow_sync_stalled: false,
|
||||||
|
data_dir: std::path::PathBuf::from(DEFAULT_ROOT_DIR),
|
||||||
spec_fork_name: None,
|
spec_fork_name: None,
|
||||||
},
|
},
|
||||||
chain: Some(chain.clone()),
|
chain: Some(chain.clone()),
|
||||||
|
@ -8,7 +8,6 @@ use libp2p::gossipsub::subscription_filter::{
|
|||||||
};
|
};
|
||||||
use libp2p::gossipsub::Gossipsub as BaseGossipsub;
|
use libp2p::gossipsub::Gossipsub as BaseGossipsub;
|
||||||
use libp2p::identify::Identify;
|
use libp2p::identify::Identify;
|
||||||
use libp2p::swarm::NetworkBehaviour;
|
|
||||||
use libp2p::NetworkBehaviour;
|
use libp2p::NetworkBehaviour;
|
||||||
use types::EthSpec;
|
use types::EthSpec;
|
||||||
|
|
||||||
|
@ -34,13 +34,13 @@ pub fn get_config<E: EthSpec>(
|
|||||||
let spec = &context.eth2_config.spec;
|
let spec = &context.eth2_config.spec;
|
||||||
let log = context.log();
|
let log = context.log();
|
||||||
|
|
||||||
let mut client_config = ClientConfig {
|
let mut client_config = ClientConfig::default();
|
||||||
data_dir: get_data_dir(cli_args),
|
|
||||||
..Default::default()
|
// Update the client's data directory
|
||||||
};
|
client_config.set_data_dir(get_data_dir(cli_args));
|
||||||
|
|
||||||
// If necessary, remove any existing database and configuration
|
// If necessary, remove any existing database and configuration
|
||||||
if client_config.data_dir.exists() && cli_args.is_present("purge-db") {
|
if client_config.data_dir().exists() && cli_args.is_present("purge-db") {
|
||||||
// Remove the chain_db.
|
// Remove the chain_db.
|
||||||
let chain_db = client_config.get_db_path();
|
let chain_db = client_config.get_db_path();
|
||||||
if chain_db.exists() {
|
if chain_db.exists() {
|
||||||
@ -57,11 +57,11 @@ pub fn get_config<E: EthSpec>(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Create `datadir` and any non-existing parent directories.
|
// Create `datadir` and any non-existing parent directories.
|
||||||
fs::create_dir_all(&client_config.data_dir)
|
fs::create_dir_all(client_config.data_dir())
|
||||||
.map_err(|e| format!("Failed to create data dir: {}", e))?;
|
.map_err(|e| format!("Failed to create data dir: {}", e))?;
|
||||||
|
|
||||||
// logs the chosen data directory
|
// logs the chosen data directory
|
||||||
let mut log_dir = client_config.data_dir.clone();
|
let mut log_dir = client_config.data_dir().clone();
|
||||||
// remove /beacon from the end
|
// remove /beacon from the end
|
||||||
log_dir.pop();
|
log_dir.pop();
|
||||||
info!(log, "Data directory initialised"; "datadir" => log_dir.into_os_string().into_string().expect("Datadir should be a valid os string"));
|
info!(log, "Data directory initialised"; "datadir" => log_dir.into_os_string().into_string().expect("Datadir should be a valid os string"));
|
||||||
@ -69,10 +69,13 @@ pub fn get_config<E: EthSpec>(
|
|||||||
/*
|
/*
|
||||||
* Networking
|
* Networking
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
let data_dir_ref = client_config.data_dir().clone();
|
||||||
|
|
||||||
set_network_config(
|
set_network_config(
|
||||||
&mut client_config.network,
|
&mut client_config.network,
|
||||||
cli_args,
|
cli_args,
|
||||||
&client_config.data_dir,
|
&data_dir_ref,
|
||||||
log,
|
log,
|
||||||
false,
|
false,
|
||||||
)?;
|
)?;
|
||||||
@ -303,7 +306,7 @@ pub fn get_config<E: EthSpec>(
|
|||||||
} else if let Some(jwt_secret_key) = cli_args.value_of("execution-jwt-secret-key") {
|
} else if let Some(jwt_secret_key) = cli_args.value_of("execution-jwt-secret-key") {
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::Write;
|
use std::io::Write;
|
||||||
secret_file = client_config.data_dir.join(DEFAULT_JWT_FILE);
|
secret_file = client_config.data_dir().join(DEFAULT_JWT_FILE);
|
||||||
let mut jwt_secret_key_file = File::create(secret_file.clone())
|
let mut jwt_secret_key_file = File::create(secret_file.clone())
|
||||||
.map_err(|e| format!("Error while creating jwt_secret_key file: {:?}", e))?;
|
.map_err(|e| format!("Error while creating jwt_secret_key file: {:?}", e))?;
|
||||||
jwt_secret_key_file
|
jwt_secret_key_file
|
||||||
@ -332,7 +335,7 @@ pub fn get_config<E: EthSpec>(
|
|||||||
clap_utils::parse_optional(cli_args, "suggested-fee-recipient")?;
|
clap_utils::parse_optional(cli_args, "suggested-fee-recipient")?;
|
||||||
el_config.jwt_id = clap_utils::parse_optional(cli_args, "execution-jwt-id")?;
|
el_config.jwt_id = clap_utils::parse_optional(cli_args, "execution-jwt-id")?;
|
||||||
el_config.jwt_version = clap_utils::parse_optional(cli_args, "execution-jwt-version")?;
|
el_config.jwt_version = clap_utils::parse_optional(cli_args, "execution-jwt-version")?;
|
||||||
el_config.default_datadir = client_config.data_dir.clone();
|
el_config.default_datadir = client_config.data_dir().clone();
|
||||||
el_config.builder_profit_threshold =
|
el_config.builder_profit_threshold =
|
||||||
clap_utils::parse_required(cli_args, "builder-profit-threshold")?;
|
clap_utils::parse_required(cli_args, "builder-profit-threshold")?;
|
||||||
let execution_timeout_multiplier =
|
let execution_timeout_multiplier =
|
||||||
@ -573,7 +576,7 @@ pub fn get_config<E: EthSpec>(
|
|||||||
let slasher_dir = if let Some(slasher_dir) = cli_args.value_of("slasher-dir") {
|
let slasher_dir = if let Some(slasher_dir) = cli_args.value_of("slasher-dir") {
|
||||||
PathBuf::from(slasher_dir)
|
PathBuf::from(slasher_dir)
|
||||||
} else {
|
} else {
|
||||||
client_config.data_dir.join("slasher_db")
|
client_config.data_dir().join("slasher_db")
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut slasher_config = slasher::Config::new(slasher_dir);
|
let mut slasher_config = slasher::Config::new(slasher_dir);
|
||||||
|
@ -62,6 +62,43 @@ curl -X GET "http://localhost:5052/lighthouse/health" -H "accept: application/j
|
|||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### `/lighthouse/ui/health`
|
||||||
|
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X GET "http://localhost:5052/lighthouse/ui/health" -H "accept: application/json" | jq
|
||||||
|
```
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"total_memory": 16443219968,
|
||||||
|
"free_memory": 1283739648,
|
||||||
|
"used_memory": 5586264064,
|
||||||
|
"sys_loadavg_1": 0.59,
|
||||||
|
"sys_loadavg_5": 1.13,
|
||||||
|
"sys_loadavg_15": 2.41,
|
||||||
|
"cpu_cores": 4,
|
||||||
|
"cpu_threads": 8,
|
||||||
|
"global_cpu_frequency": 3.4,
|
||||||
|
"disk_bytes_total": 502390845440,
|
||||||
|
"disk_bytes_free": 9981386752,
|
||||||
|
"network_name": "wlp0s20f3",
|
||||||
|
"network_bytes_total_received": 14105556611,
|
||||||
|
"network_bytes_total_transmit": 3649489389,
|
||||||
|
"nat_open": true,
|
||||||
|
"connected_peers": 80,
|
||||||
|
"sync_state": "Synced",
|
||||||
|
"system_uptime": 660706,
|
||||||
|
"app_uptime": 105,
|
||||||
|
"system_name": "Arch Linux",
|
||||||
|
"kernel_version": "5.19.13-arch1-1",
|
||||||
|
"os_version": "Linux rolling Arch Linux",
|
||||||
|
"host_name": "Computer1"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
### `/lighthouse/syncing`
|
### `/lighthouse/syncing`
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
@ -6,6 +6,7 @@ HTTP Path | Description |
|
|||||||
| --- | -- |
|
| --- | -- |
|
||||||
[`GET /lighthouse/version`](#get-lighthouseversion) | Get the Lighthouse software version.
|
[`GET /lighthouse/version`](#get-lighthouseversion) | Get the Lighthouse software version.
|
||||||
[`GET /lighthouse/health`](#get-lighthousehealth) | Get information about the host machine.
|
[`GET /lighthouse/health`](#get-lighthousehealth) | Get information about the host machine.
|
||||||
|
[`GET /lighthouse/ui/health`](#get-lighthouseuihealth) | Get information about the host machine. Focused for UI applications.
|
||||||
[`GET /lighthouse/spec`](#get-lighthousespec) | Get the Ethereum proof-of-stake consensus specification used by the validator.
|
[`GET /lighthouse/spec`](#get-lighthousespec) | Get the Ethereum proof-of-stake consensus specification used by the validator.
|
||||||
[`GET /lighthouse/auth`](#get-lighthouseauth) | Get the location of the authorization token.
|
[`GET /lighthouse/auth`](#get-lighthouseauth) | Get the location of the authorization token.
|
||||||
[`GET /lighthouse/validators`](#get-lighthousevalidators) | List all validators.
|
[`GET /lighthouse/validators`](#get-lighthousevalidators) | List all validators.
|
||||||
@ -77,6 +78,45 @@ Returns information regarding the health of the host machine.
|
|||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## `GET /lighthouse/ui/health`
|
||||||
|
|
||||||
|
Returns information regarding the health of the host machine.
|
||||||
|
|
||||||
|
### HTTP Specification
|
||||||
|
|
||||||
|
| Property | Specification |
|
||||||
|
|-------------------|--------------------------------------------|
|
||||||
|
| Path | `/lighthouse/ui/health` |
|
||||||
|
| Method | GET |
|
||||||
|
| Required Headers | [`Authorization`](./api-vc-auth-header.md) |
|
||||||
|
| Typical Responses | 200 |
|
||||||
|
|
||||||
|
### Example Response Body
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"total_memory": 16443219968,
|
||||||
|
"free_memory": 1283739648,
|
||||||
|
"used_memory": 5586264064,
|
||||||
|
"sys_loadavg_1": 0.59,
|
||||||
|
"sys_loadavg_5": 1.13,
|
||||||
|
"sys_loadavg_15": 2.41,
|
||||||
|
"cpu_cores": 4,
|
||||||
|
"cpu_threads": 8,
|
||||||
|
"global_cpu_frequency": 3.4,
|
||||||
|
"disk_bytes_total": 502390845440,
|
||||||
|
"disk_bytes_free": 9981386752,
|
||||||
|
"system_uptime": 660706,
|
||||||
|
"app_uptime": 105,
|
||||||
|
"system_name": "Arch Linux",
|
||||||
|
"kernel_version": "5.19.13-arch1-1",
|
||||||
|
"os_version": "Linux rolling Arch Linux",
|
||||||
|
"host_name": "Computer1"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
## `GET /lighthouse/spec`
|
## `GET /lighthouse/spec`
|
||||||
|
|
||||||
Returns the Ethereum proof-of-stake consensus specification loaded for this validator.
|
Returns the Ethereum proof-of-stake consensus specification loaded for this validator.
|
||||||
|
13
common/system_health/Cargo.toml
Normal file
13
common/system_health/Cargo.toml
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
[package]
|
||||||
|
name = "system_health"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
lighthouse_network = { path = "../../beacon_node/lighthouse_network" }
|
||||||
|
types = { path = "../../consensus/types" }
|
||||||
|
sysinfo = "0.26.5"
|
||||||
|
serde = "1.0.116"
|
||||||
|
serde_derive = "1.0.116"
|
||||||
|
serde_json = "1.0.58"
|
||||||
|
parking_lot = "0.12.0"
|
241
common/system_health/src/lib.rs
Normal file
241
common/system_health/src/lib.rs
Normal file
@ -0,0 +1,241 @@
|
|||||||
|
use lighthouse_network::{types::SyncState, NetworkGlobals};
|
||||||
|
use parking_lot::RwLock;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use std::path::{Path, PathBuf};
|
||||||
|
use std::sync::Arc;
|
||||||
|
use sysinfo::{CpuExt, DiskExt, NetworkExt, NetworksExt, System, SystemExt};
|
||||||
|
use types::EthSpec;
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||||
|
pub struct SystemHealth {
|
||||||
|
/// Total memory of the system.
|
||||||
|
pub total_memory: u64,
|
||||||
|
/// Total free memory available to the system.
|
||||||
|
pub free_memory: u64,
|
||||||
|
/// Total used memory.
|
||||||
|
pub used_memory: u64,
|
||||||
|
|
||||||
|
/// System load average over 1 minute.
|
||||||
|
pub sys_loadavg_1: f64,
|
||||||
|
/// System load average over 5 minutes.
|
||||||
|
pub sys_loadavg_5: f64,
|
||||||
|
/// System load average over 15 minutes.
|
||||||
|
pub sys_loadavg_15: f64,
|
||||||
|
|
||||||
|
/// Total cpu cores.
|
||||||
|
pub cpu_cores: usize,
|
||||||
|
/// Total cpu threads.
|
||||||
|
pub cpu_threads: usize,
|
||||||
|
/// The global cpu frequency.
|
||||||
|
pub global_cpu_frequency: f32,
|
||||||
|
|
||||||
|
/// Total capacity of disk.
|
||||||
|
pub disk_bytes_total: u64,
|
||||||
|
/// Free space in disk.
|
||||||
|
pub disk_bytes_free: u64,
|
||||||
|
|
||||||
|
/// System uptime.
|
||||||
|
pub system_uptime: u64,
|
||||||
|
/// Application uptime.
|
||||||
|
pub app_uptime: u64,
|
||||||
|
/// The System name
|
||||||
|
pub system_name: String,
|
||||||
|
/// Kernel version
|
||||||
|
pub kernel_version: String,
|
||||||
|
/// OS version
|
||||||
|
pub os_version: String,
|
||||||
|
/// Hostname
|
||||||
|
pub host_name: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// System related health, specific to the UI for the validator client.
|
||||||
|
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||||
|
pub struct SystemHealthVC {
|
||||||
|
#[serde(flatten)]
|
||||||
|
pub system_health: SystemHealth,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// System related health, specific to the UI for the Beacon Node.
|
||||||
|
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||||
|
pub struct SystemHealthBN {
|
||||||
|
#[serde(flatten)]
|
||||||
|
pub system_health: SystemHealth,
|
||||||
|
/// The name of the network that uses the most traffic.
|
||||||
|
pub network_name: String,
|
||||||
|
/// Total bytes received over the main interface.
|
||||||
|
pub network_bytes_total_received: u64,
|
||||||
|
/// Total bytes sent over the main interface.
|
||||||
|
pub network_bytes_total_transmit: u64,
|
||||||
|
|
||||||
|
/// The current NAT status.
|
||||||
|
pub nat_open: bool,
|
||||||
|
/// The current number of connected peers.
|
||||||
|
pub connected_peers: usize,
|
||||||
|
/// The current syncing state of the consensus node.
|
||||||
|
pub sync_state: SyncState,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Populates the system health.
|
||||||
|
fn observe_system_health(
|
||||||
|
sysinfo: Arc<RwLock<System>>,
|
||||||
|
data_dir: PathBuf,
|
||||||
|
app_uptime: u64,
|
||||||
|
) -> SystemHealth {
|
||||||
|
let sysinfo = sysinfo.read();
|
||||||
|
let loadavg = sysinfo.load_average();
|
||||||
|
|
||||||
|
let cpus = sysinfo.cpus();
|
||||||
|
|
||||||
|
let disks = sysinfo.disks();
|
||||||
|
|
||||||
|
let system_uptime = sysinfo.uptime();
|
||||||
|
|
||||||
|
// Helper functions to extract specific data
|
||||||
|
|
||||||
|
// Find fs associated with the data dir location and report this
|
||||||
|
let (disk_bytes_total, disk_bytes_free) = {
|
||||||
|
// There is no clean way to find this in an OS-agnostic way. We take a simple approach,
|
||||||
|
// which is attempt to match the mount_point to the data_dir. If this cannot be done, we
|
||||||
|
// just fallback to the root fs.
|
||||||
|
|
||||||
|
let mut root_fs_disk = None;
|
||||||
|
let mut other_matching_fs = None;
|
||||||
|
|
||||||
|
for disk in disks.iter() {
|
||||||
|
if disk.mount_point() == Path::new("/")
|
||||||
|
|| disk.mount_point() == Path::new("C:\\")
|
||||||
|
|| disk.mount_point() == Path::new("/System/Volumes/Data")
|
||||||
|
{
|
||||||
|
// Found the usual default root_fs
|
||||||
|
root_fs_disk = Some(disk);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we have other file systems, compare these to the data_dir of Lighthouse and
|
||||||
|
// prioritize these.
|
||||||
|
if data_dir
|
||||||
|
.to_str()
|
||||||
|
.map(|path| {
|
||||||
|
if let Some(mount_str) = disk.mount_point().to_str() {
|
||||||
|
path.contains(mount_str)
|
||||||
|
} else {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.unwrap_or(false)
|
||||||
|
{
|
||||||
|
other_matching_fs = Some(disk);
|
||||||
|
break; // Don't bother finding other competing fs.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we found a file system other than the root, report this, otherwise just report the
|
||||||
|
// root fs
|
||||||
|
let fs = other_matching_fs.or(root_fs_disk);
|
||||||
|
|
||||||
|
// If the root fs is not known, just add up the total of all known partitions
|
||||||
|
match fs {
|
||||||
|
Some(fs) => (fs.total_space(), fs.available_space()),
|
||||||
|
None => {
|
||||||
|
// If we can't find a known partition, just add them all up
|
||||||
|
disks.iter().fold((0, 0), |mut current_sizes, disk| {
|
||||||
|
current_sizes.0 += disk.total_space();
|
||||||
|
current_sizes.1 += disk.available_space();
|
||||||
|
current_sizes
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Attempt to get the clock speed from the name of the CPU
|
||||||
|
let cpu_frequency_from_name = cpus.iter().next().and_then(|cpu| {
|
||||||
|
cpu.brand()
|
||||||
|
.split_once("GHz")
|
||||||
|
.and_then(|(result, _)| result.trim().rsplit_once(' '))
|
||||||
|
.and_then(|(_, result)| result.parse::<f32>().ok())
|
||||||
|
});
|
||||||
|
|
||||||
|
let global_cpu_frequency = match cpu_frequency_from_name {
|
||||||
|
Some(freq) => freq,
|
||||||
|
None => {
|
||||||
|
// Get the frequency from average measured frequencies
|
||||||
|
let global_cpu_frequency: f32 =
|
||||||
|
cpus.iter().map(|cpu| cpu.frequency()).sum::<u64>() as f32 / cpus.len() as f32;
|
||||||
|
// Shift to ghz to 1dp
|
||||||
|
(global_cpu_frequency / 100.0).round() / 10.0
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
SystemHealth {
|
||||||
|
total_memory: sysinfo.total_memory(),
|
||||||
|
free_memory: sysinfo.free_memory(),
|
||||||
|
used_memory: sysinfo.used_memory(),
|
||||||
|
sys_loadavg_1: loadavg.one,
|
||||||
|
sys_loadavg_5: loadavg.five,
|
||||||
|
sys_loadavg_15: loadavg.fifteen,
|
||||||
|
cpu_cores: sysinfo.physical_core_count().unwrap_or(0),
|
||||||
|
cpu_threads: cpus.len(),
|
||||||
|
global_cpu_frequency,
|
||||||
|
disk_bytes_total,
|
||||||
|
disk_bytes_free,
|
||||||
|
system_uptime,
|
||||||
|
app_uptime,
|
||||||
|
system_name: sysinfo.name().unwrap_or_else(|| String::from("")),
|
||||||
|
kernel_version: sysinfo.kernel_version().unwrap_or_else(|| "".into()),
|
||||||
|
os_version: sysinfo.long_os_version().unwrap_or_else(|| "".into()),
|
||||||
|
host_name: sysinfo.host_name().unwrap_or_else(|| "".into()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Observes the Validator client system health.
|
||||||
|
pub fn observe_system_health_vc(
|
||||||
|
sysinfo: Arc<RwLock<System>>,
|
||||||
|
data_dir: PathBuf,
|
||||||
|
app_uptime: u64,
|
||||||
|
) -> SystemHealthVC {
|
||||||
|
SystemHealthVC {
|
||||||
|
system_health: observe_system_health(sysinfo, data_dir, app_uptime),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Observes the Beacon Node system health.
|
||||||
|
pub fn observe_system_health_bn<TSpec: EthSpec>(
|
||||||
|
sysinfo: Arc<RwLock<System>>,
|
||||||
|
data_dir: PathBuf,
|
||||||
|
app_uptime: u64,
|
||||||
|
network_globals: Arc<NetworkGlobals<TSpec>>,
|
||||||
|
) -> SystemHealthBN {
|
||||||
|
let system_health = observe_system_health(sysinfo.clone(), data_dir, app_uptime);
|
||||||
|
|
||||||
|
// Find the network with the most traffic and assume this is the main network
|
||||||
|
let sysinfo = sysinfo.read();
|
||||||
|
let networks = sysinfo.networks();
|
||||||
|
let (network_name, network_bytes_total_received, network_bytes_total_transmit) = networks
|
||||||
|
.iter()
|
||||||
|
.max_by_key(|(_name, network)| network.total_received())
|
||||||
|
.map(|(name, network)| {
|
||||||
|
(
|
||||||
|
name.clone(),
|
||||||
|
network.total_received(),
|
||||||
|
network.total_transmitted(),
|
||||||
|
)
|
||||||
|
})
|
||||||
|
.unwrap_or_else(|| (String::from("None"), 0, 0));
|
||||||
|
|
||||||
|
// Determine if the NAT is open or not.
|
||||||
|
let nat_open = lighthouse_network::metrics::NAT_OPEN
|
||||||
|
.as_ref()
|
||||||
|
.map(|v| v.get())
|
||||||
|
.unwrap_or(0)
|
||||||
|
!= 0;
|
||||||
|
|
||||||
|
SystemHealthBN {
|
||||||
|
system_health,
|
||||||
|
network_name,
|
||||||
|
network_bytes_total_received,
|
||||||
|
network_bytes_total_transmit,
|
||||||
|
nat_open,
|
||||||
|
connected_peers: network_globals.connected_peers(),
|
||||||
|
sync_state: network_globals.sync_state(),
|
||||||
|
}
|
||||||
|
}
|
@ -98,10 +98,9 @@ fn parse_client_config<E: EthSpec>(
|
|||||||
cli_args: &ArgMatches,
|
cli_args: &ArgMatches,
|
||||||
_env: &Environment<E>,
|
_env: &Environment<E>,
|
||||||
) -> Result<ClientConfig, String> {
|
) -> Result<ClientConfig, String> {
|
||||||
let mut client_config = ClientConfig {
|
let mut client_config = ClientConfig::default();
|
||||||
data_dir: get_data_dir(cli_args),
|
|
||||||
..Default::default()
|
client_config.set_data_dir(get_data_dir(cli_args));
|
||||||
};
|
|
||||||
|
|
||||||
if let Some(freezer_dir) = clap_utils::parse_optional(cli_args, "freezer-dir")? {
|
if let Some(freezer_dir) = clap_utils::parse_optional(cli_args, "freezer-dir")? {
|
||||||
client_config.freezer_db_path = Some(freezer_dir);
|
client_config.freezer_db_path = Some(freezer_dir);
|
||||||
|
@ -56,7 +56,9 @@ impl CommandLineTestExec for CommandLineTest {
|
|||||||
fn datadir_flag() {
|
fn datadir_flag() {
|
||||||
CommandLineTest::new()
|
CommandLineTest::new()
|
||||||
.run_with_zero_port()
|
.run_with_zero_port()
|
||||||
.with_config_and_dir(|config, dir| assert_eq!(config.data_dir, dir.path().join("beacon")));
|
.with_config_and_dir(|config, dir| {
|
||||||
|
assert_eq!(*config.data_dir(), dir.path().join("beacon"))
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -48,7 +48,7 @@ impl<E: EthSpec> LocalBeaconNode<E> {
|
|||||||
.tempdir()
|
.tempdir()
|
||||||
.expect("should create temp directory for client datadir");
|
.expect("should create temp directory for client datadir");
|
||||||
|
|
||||||
client_config.data_dir = datadir.path().into();
|
client_config.set_data_dir(datadir.path().into());
|
||||||
client_config.network.network_dir = PathBuf::from(datadir.path()).join("network");
|
client_config.network.network_dir = PathBuf::from(datadir.path()).join("network");
|
||||||
|
|
||||||
ProductionBeaconNode::new(context, client_config)
|
ProductionBeaconNode::new(context, client_config)
|
||||||
|
@ -59,3 +59,6 @@ task_executor = { path = "../common/task_executor" }
|
|||||||
reqwest = { version = "0.11.0", features = ["json","stream"] }
|
reqwest = { version = "0.11.0", features = ["json","stream"] }
|
||||||
url = "2.2.2"
|
url = "2.2.2"
|
||||||
malloc_utils = { path = "../common/malloc_utils" }
|
malloc_utils = { path = "../common/malloc_utils" }
|
||||||
|
sysinfo = "0.26.5"
|
||||||
|
system_health = { path = "../common/system_health" }
|
||||||
|
|
||||||
|
@ -16,6 +16,7 @@ use eth2::lighthouse_vc::{
|
|||||||
types::{self as api_types, GenericResponse, PublicKey, PublicKeyBytes},
|
types::{self as api_types, GenericResponse, PublicKey, PublicKeyBytes},
|
||||||
};
|
};
|
||||||
use lighthouse_version::version_with_platform;
|
use lighthouse_version::version_with_platform;
|
||||||
|
use parking_lot::RwLock;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use slog::{crit, info, warn, Logger};
|
use slog::{crit, info, warn, Logger};
|
||||||
use slot_clock::SlotClock;
|
use slot_clock::SlotClock;
|
||||||
@ -24,6 +25,8 @@ use std::marker::PhantomData;
|
|||||||
use std::net::{IpAddr, Ipv4Addr, SocketAddr};
|
use std::net::{IpAddr, Ipv4Addr, SocketAddr};
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
use sysinfo::{System, SystemExt};
|
||||||
|
use system_health::observe_system_health_vc;
|
||||||
use task_executor::TaskExecutor;
|
use task_executor::TaskExecutor;
|
||||||
use types::{ChainSpec, ConfigAndPreset, EthSpec};
|
use types::{ChainSpec, ConfigAndPreset, EthSpec};
|
||||||
use validator_dir::Builder as ValidatorDirBuilder;
|
use validator_dir::Builder as ValidatorDirBuilder;
|
||||||
@ -183,6 +186,35 @@ pub fn serve<T: 'static + SlotClock + Clone, E: EthSpec>(
|
|||||||
let api_token_path_inner = api_token_path.clone();
|
let api_token_path_inner = api_token_path.clone();
|
||||||
let api_token_path_filter = warp::any().map(move || api_token_path_inner.clone());
|
let api_token_path_filter = warp::any().map(move || api_token_path_inner.clone());
|
||||||
|
|
||||||
|
// Create a `warp` filter that provides access to local system information.
|
||||||
|
let system_info = Arc::new(RwLock::new(sysinfo::System::new()));
|
||||||
|
{
|
||||||
|
// grab write access for initialisation
|
||||||
|
let mut system_info = system_info.write();
|
||||||
|
system_info.refresh_disks_list();
|
||||||
|
system_info.refresh_networks_list();
|
||||||
|
} // end lock
|
||||||
|
|
||||||
|
let system_info_filter =
|
||||||
|
warp::any()
|
||||||
|
.map(move || system_info.clone())
|
||||||
|
.map(|sysinfo: Arc<RwLock<System>>| {
|
||||||
|
{
|
||||||
|
// refresh stats
|
||||||
|
let mut sysinfo_lock = sysinfo.write();
|
||||||
|
sysinfo_lock.refresh_memory();
|
||||||
|
sysinfo_lock.refresh_cpu_specifics(sysinfo::CpuRefreshKind::everything());
|
||||||
|
sysinfo_lock.refresh_cpu();
|
||||||
|
sysinfo_lock.refresh_system();
|
||||||
|
sysinfo_lock.refresh_networks();
|
||||||
|
sysinfo_lock.refresh_disks();
|
||||||
|
} // end lock
|
||||||
|
sysinfo
|
||||||
|
});
|
||||||
|
|
||||||
|
let app_start = std::time::Instant::now();
|
||||||
|
let app_start_filter = warp::any().map(move || app_start);
|
||||||
|
|
||||||
// GET lighthouse/version
|
// GET lighthouse/version
|
||||||
let get_node_version = warp::path("lighthouse")
|
let get_node_version = warp::path("lighthouse")
|
||||||
.and(warp::path("version"))
|
.and(warp::path("version"))
|
||||||
@ -279,6 +311,24 @@ pub fn serve<T: 'static + SlotClock + Clone, E: EthSpec>(
|
|||||||
},
|
},
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// GET lighthouse/ui/health
|
||||||
|
let get_lighthouse_ui_health = warp::path("lighthouse")
|
||||||
|
.and(warp::path("ui"))
|
||||||
|
.and(warp::path("health"))
|
||||||
|
.and(warp::path::end())
|
||||||
|
.and(system_info_filter)
|
||||||
|
.and(app_start_filter)
|
||||||
|
.and(validator_dir_filter.clone())
|
||||||
|
.and(signer.clone())
|
||||||
|
.and_then(|sysinfo, app_start: std::time::Instant, val_dir, signer| {
|
||||||
|
blocking_signed_json_task(signer, move || {
|
||||||
|
let app_uptime = app_start.elapsed().as_secs() as u64;
|
||||||
|
Ok(api_types::GenericResponse::from(observe_system_health_vc(
|
||||||
|
sysinfo, val_dir, app_uptime,
|
||||||
|
)))
|
||||||
|
})
|
||||||
|
});
|
||||||
|
|
||||||
// POST lighthouse/validators/
|
// POST lighthouse/validators/
|
||||||
let post_validators = warp::path("lighthouse")
|
let post_validators = warp::path("lighthouse")
|
||||||
.and(warp::path("validators"))
|
.and(warp::path("validators"))
|
||||||
@ -894,6 +944,7 @@ pub fn serve<T: 'static + SlotClock + Clone, E: EthSpec>(
|
|||||||
.or(get_lighthouse_spec)
|
.or(get_lighthouse_spec)
|
||||||
.or(get_lighthouse_validators)
|
.or(get_lighthouse_validators)
|
||||||
.or(get_lighthouse_validators_pubkey)
|
.or(get_lighthouse_validators_pubkey)
|
||||||
|
.or(get_lighthouse_ui_health)
|
||||||
.or(get_fee_recipient)
|
.or(get_fee_recipient)
|
||||||
.or(get_gas_limit)
|
.or(get_gas_limit)
|
||||||
.or(get_std_keystores)
|
.or(get_std_keystores)
|
||||||
|
Loading…
Reference in New Issue
Block a user