Improve beacon node logging (#660)

* Squish prev commits into a single atop master

* Enable http and ws during testing

* Checkout clobbered files from master

* Further un-clobber rest_api changes

* Tidy formatting

* Do rust-fmt's job for it :|
This commit is contained in:
Paul Hauner 2019-12-06 18:44:38 +11:00 committed by GitHub
parent 75efed305c
commit 779873680b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 357 additions and 181 deletions

View File

@ -837,11 +837,29 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
//
// This is likely overly restrictive, we could store the attestation for later
// processing.
warn!(
self.log,
"Dropped attestation for unknown block";
"block" => format!("{}", attestation.data.beacon_block_root)
);
let head_epoch = self
.head()
.beacon_block
.slot
.epoch(T::EthSpec::slots_per_epoch());
let attestation_epoch = attestation.data.slot.epoch(T::EthSpec::slots_per_epoch());
// Only log a warning if our head is in a reasonable place to verify this attestation.
// This avoids excess logging during syncing.
if head_epoch + 1 >= attestation_epoch {
warn!(
self.log,
"Dropped attestation for unknown block";
"block" => format!("{}", attestation.data.beacon_block_root)
);
} else {
debug!(
self.log,
"Dropped attestation for unknown block";
"block" => format!("{}", attestation.data.beacon_block_root)
);
}
Ok(AttestationProcessingOutcome::UnknownHeadBlock {
beacon_block_root: attestation.data.beacon_block_root,
})
@ -1453,12 +1471,6 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
"new_slot" => new_slot
);
} else {
info!(
self.log,
"New head beacon block";
"root" => format!("{}", beacon_block_root),
"slot" => new_slot,
);
debug!(
self.log,
"Head beacon block";
@ -1466,6 +1478,8 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
"justified_epoch" => beacon_state.current_justified_checkpoint.epoch,
"finalized_root" => format!("{}", beacon_state.finalized_checkpoint.root),
"finalized_epoch" => beacon_state.finalized_checkpoint.epoch,
"root" => format!("{}", beacon_block_root),
"slot" => new_slot,
);
};

View File

@ -1,4 +1,5 @@
use crate::config::{ClientGenesis, Config as ClientConfig};
use crate::notifier::spawn_notifier;
use crate::Client;
use beacon_chain::{
builder::{BeaconChainBuilder, Witness},
@ -15,28 +16,23 @@ use environment::RuntimeContext;
use eth1::{Config as Eth1Config, Service as Eth1Service};
use eth2_config::Eth2Config;
use exit_future::Signal;
use futures::{future, Future, IntoFuture, Stream};
use futures::{future, Future, IntoFuture};
use genesis::{
generate_deterministic_keypairs, interop_genesis_state, state_from_ssz_file, Eth1GenesisService,
};
use lighthouse_bootstrap::Bootstrapper;
use lmd_ghost::LmdGhost;
use network::{NetworkConfig, NetworkMessage, Service as NetworkService};
use slog::{debug, error, info, warn};
use slog::info;
use ssz::Decode;
use std::net::SocketAddr;
use std::path::Path;
use std::sync::Arc;
use std::time::{Duration, Instant};
use std::time::Duration;
use tokio::sync::mpsc::UnboundedSender;
use tokio::timer::Interval;
use types::{BeaconState, ChainSpec, EthSpec};
use websocket_server::{Config as WebSocketConfig, WebSocketSender};
/// The interval between notifier events.
pub const NOTIFIER_INTERVAL_SECONDS: u64 = 15;
/// Create a warning log whenever the peer count is at or below this value.
pub const WARN_PEER_COUNT: usize = 1;
/// Interval between polling the eth1 node for genesis information.
pub const ETH1_GENESIS_UPDATE_INTERVAL_MILLIS: u64 = 7_000;
@ -330,105 +326,32 @@ where
Ok(self)
}
/// Immediately starts the service that periodically logs about the libp2p peer count.
pub fn peer_count_notifier(mut self) -> Result<Self, String> {
let context = self
.runtime_context
.as_ref()
.ok_or_else(|| "peer_count_notifier requires a runtime_context")?
.service_context("peer_notifier".into());
let log = context.log.clone();
let log_2 = context.log.clone();
let network = self
.libp2p_network
.clone()
.ok_or_else(|| "peer_notifier requires a libp2p network")?;
let (exit_signal, exit) = exit_future::signal();
self.exit_signals.push(exit_signal);
let interval_future = Interval::new(
Instant::now(),
Duration::from_secs(NOTIFIER_INTERVAL_SECONDS),
)
.map_err(move |e| error!(log_2, "Notifier timer failed"; "error" => format!("{:?}", e)))
.for_each(move |_| {
// NOTE: Panics if libp2p is poisoned.
let connected_peer_count = network.libp2p_service().lock().swarm.connected_peers();
debug!(log, "Connected peer status"; "peer_count" => connected_peer_count);
if connected_peer_count <= WARN_PEER_COUNT {
warn!(log, "Low peer count"; "peer_count" => connected_peer_count);
}
Ok(())
});
context
.executor
.spawn(exit.until(interval_future).map(|_| ()));
Ok(self)
}
/// Immediately starts the service that periodically logs information each slot.
pub fn slot_notifier(mut self) -> Result<Self, String> {
pub fn notifier(mut self) -> Result<Self, String> {
let context = self
.runtime_context
.as_ref()
.ok_or_else(|| "slot_notifier requires a runtime_context")?
.service_context("slot_notifier".into());
let log = context.log.clone();
let log_2 = log.clone();
let beacon_chain = self
.beacon_chain
.clone()
.ok_or_else(|| "slot_notifier requires a libp2p network")?;
let spec = self
.chain_spec
.ok_or_else(|| "slot_notifier requires a beacon chain")?;
let network = self
.libp2p_network
.clone()
.ok_or_else(|| "slot_notifier requires a chain spec".to_string())?;
let slot_duration = Duration::from_millis(spec.milliseconds_per_slot);
let duration_to_next_slot = beacon_chain
.slot_clock
.duration_to_next_slot()
.ok_or_else(|| "slot_notifier unable to determine time to next slot")?;
.ok_or_else(|| "slot_notifier requires a libp2p network")?;
let milliseconds_per_slot = self
.chain_spec
.as_ref()
.ok_or_else(|| "slot_notifier requires a chain spec".to_string())?
.milliseconds_per_slot;
let (exit_signal, exit) = exit_future::signal();
let exit_signal = spawn_notifier(context, beacon_chain, network, milliseconds_per_slot)
.map_err(|e| format!("Unable to start slot notifier: {}", e))?;
self.exit_signals.push(exit_signal);
let interval_future = Interval::new(Instant::now() + duration_to_next_slot, slot_duration)
.map_err(move |e| error!(log_2, "Slot timer failed"; "error" => format!("{:?}", e)))
.for_each(move |_| {
let best_slot = beacon_chain.head().beacon_block.slot;
let latest_block_root = beacon_chain.head().beacon_block_root;
if let Ok(current_slot) = beacon_chain.slot() {
info!(
log,
"Slot start";
"skip_slots" => current_slot.saturating_sub(best_slot),
"best_block_root" => format!("{}", latest_block_root),
"best_block_slot" => best_slot,
"slot" => current_slot,
)
} else {
error!(
log,
"Beacon chain running whilst slot clock is unavailable."
);
};
Ok(())
});
context
.executor
.spawn(exit.until(interval_future).map(|_| ()));
Ok(self)
}

View File

@ -1,6 +1,7 @@
extern crate slog;
mod config;
mod notifier;
pub mod builder;
pub mod error;

View File

@ -0,0 +1,195 @@
use beacon_chain::{BeaconChain, BeaconChainTypes};
use environment::RuntimeContext;
use exit_future::Signal;
use futures::{Future, Stream};
use network::Service as NetworkService;
use parking_lot::Mutex;
use slog::{debug, error, info, warn};
use slot_clock::SlotClock;
use std::sync::Arc;
use std::time::{Duration, Instant};
use tokio::timer::Interval;
use types::{EthSpec, Slot};
/// Create a warning log whenever the peer count is at or below this value.
pub const WARN_PEER_COUNT: usize = 1;
const SECS_PER_MINUTE: u64 = 60;
const SECS_PER_HOUR: u64 = 3600;
const SECS_PER_DAY: u64 = 86400; // non-leap
const SECS_PER_WEEK: u64 = 604800; // non-leap
const DAYS_PER_WEEK: u64 = 7;
const HOURS_PER_DAY: u64 = 24;
const MINUTES_PER_HOUR: u64 = 60;
/// Spawns a notifier service which periodically logs information about the node.
pub fn spawn_notifier<T: BeaconChainTypes>(
context: RuntimeContext<T::EthSpec>,
beacon_chain: Arc<BeaconChain<T>>,
network: Arc<NetworkService<T>>,
milliseconds_per_slot: u64,
) -> Result<Signal, String> {
let log_1 = context.log.clone();
let log_2 = context.log.clone();
let slot_duration = Duration::from_millis(milliseconds_per_slot);
let duration_to_next_slot = beacon_chain
.slot_clock
.duration_to_next_slot()
.ok_or_else(|| "slot_notifier unable to determine time to next slot")?;
// Run this half way through each slot.
let start_instant = Instant::now() + duration_to_next_slot + (slot_duration / 2);
// Run this each slot.
let interval_duration = slot_duration;
let previous_head_slot = Mutex::new(Slot::new(0));
let interval_future = Interval::new(start_instant, interval_duration)
.map_err(
move |e| error!(log_1, "Slot notifier timer failed"; "error" => format!("{:?}", e)),
)
.for_each(move |_| {
let log = log_2.clone();
let connected_peer_count = network.libp2p_service().lock().swarm.connected_peers();
let head = beacon_chain.head();
let head_slot = head.beacon_block.slot;
let head_epoch = head_slot.epoch(T::EthSpec::slots_per_epoch());
let current_slot = beacon_chain.slot().map_err(|e| {
error!(
log,
"Unable to read current slot";
"error" => format!("{:?}", e)
)
})?;
let current_epoch = current_slot.epoch(T::EthSpec::slots_per_epoch());
let finalized_epoch = head.beacon_state.finalized_checkpoint.epoch;
let finalized_root = head.beacon_state.finalized_checkpoint.root;
let head_root = head.beacon_block_root;
let mut previous_head_slot = previous_head_slot.lock();
// The next two lines take advantage of saturating subtraction on `Slot`.
let head_distance = current_slot - head_slot;
let slots_since_last_update = head_slot - *previous_head_slot;
*previous_head_slot = head_slot;
if connected_peer_count <= WARN_PEER_COUNT {
warn!(log, "Low peer count"; "peer_count" => connected_peer_count);
}
debug!(
log,
"Slot timer";
"peers" => connected_peer_count,
"finalized_root" => format!("{}", finalized_root),
"finalized_epoch" => finalized_epoch,
"head_block" => format!("{}", head_root),
"head_slot" => head_slot,
"current_slot" => current_slot,
);
if head_epoch + 1 < current_epoch {
let distance = format!(
"{} slots ({})",
head_distance.as_u64(),
slot_distance_pretty(head_distance, slot_duration)
);
info!(
log,
"Syncing";
"peers" => connected_peer_count,
"speed" => sync_rate_pretty(slots_since_last_update, interval_duration.as_secs()),
"distance" => distance
);
return Ok(());
};
macro_rules! not_quite_synced_log {
($message: expr) => {
info!(
log_2,
$message;
"peers" => connected_peer_count,
"finalized_root" => format!("{}", finalized_root),
"finalized_epoch" => finalized_epoch,
"head_slot" => head_slot,
"current_slot" => current_slot,
);
}
}
if head_epoch + 1 == current_epoch {
not_quite_synced_log!("Synced to previous epoch")
} else if head_slot != current_slot {
not_quite_synced_log!("Synced to current epoch")
} else {
info!(
log_2,
"Synced";
"peers" => connected_peer_count,
"finalized_root" => format!("{}", finalized_root),
"finalized_epoch" => finalized_epoch,
"epoch" => current_epoch,
"slot" => current_slot,
);
};
Ok(())
});
let (exit_signal, exit) = exit_future::signal();
context
.executor
.spawn(exit.until(interval_future).map(|_| ()));
Ok(exit_signal)
}
/// Returns a nicely formated string describing the rate of slot imports per second.
fn sync_rate_pretty(slots_since_last_update: Slot, update_interval_secs: u64) -> String {
if update_interval_secs == 0 {
return "Error".into();
}
if slots_since_last_update == 0 {
"No progress".into()
} else {
format!(
"{} slots/sec",
slots_since_last_update / update_interval_secs
)
}
}
/// Returns a nicely formatted string describing the `slot_span` in terms of weeks, days, hours
/// and/or minutes.
fn slot_distance_pretty(slot_span: Slot, slot_duration: Duration) -> String {
if slot_duration == Duration::from_secs(0) {
return String::from("Unknown");
}
let secs = (slot_duration * slot_span.as_u64() as u32).as_secs();
let weeks = secs / SECS_PER_WEEK;
let days = secs / SECS_PER_DAY;
let hours = secs / SECS_PER_HOUR;
let minutes = secs / SECS_PER_MINUTE;
if weeks > 0 {
format!("{} weeks {} days", weeks, days % DAYS_PER_WEEK)
} else if days > 0 {
format!("{} days {} hrs", days, hours % HOURS_PER_DAY)
} else if hours > 0 {
format!("{} hrs {} mins", hours, minutes % MINUTES_PER_HOUR)
} else {
format!("{} mins", minutes)
}
}

View File

@ -43,7 +43,7 @@ pub struct Config {
impl Default for Config {
fn default() -> Self {
Config {
enabled: true,
enabled: false,
listen_address: Ipv4Addr::new(127, 0, 0, 1),
port: 5052,
}

View File

@ -97,7 +97,7 @@ pub fn start_server<T: BeaconChainTypes>(
let (exit_signal, exit) = exit_future::signal();
let inner_log = log.clone();
let server_exit = exit.and_then(move |_| {
info!(inner_log, "API service shutdown");
info!(inner_log, "HTTP service shutdown");
Ok(())
});
// Configure the `hyper` server to gracefully shutdown when the shutdown channel is triggered.
@ -107,13 +107,13 @@ pub fn start_server<T: BeaconChainTypes>(
.map_err(move |e| {
warn!(
inner_log,
"API failed to start, Unable to bind"; "address" => format!("{:?}", e)
"HTTP server failed to start, Unable to bind"; "address" => format!("{:?}", e)
)
});
info!(
log,
"REST API started";
"HTTP API started";
"address" => format!("{}", actual_listen_addr.ip()),
"port" => actual_listen_addr.port(),
);

View File

@ -105,7 +105,7 @@ pub fn route<T: BeaconChainTypes>(
validator::post_validator_duties::<T>(req, beacon_chain)
}
(&Method::GET, "/validator/block") => {
into_boxfut(validator::get_new_beacon_block::<T>(req, beacon_chain))
into_boxfut(validator::get_new_beacon_block::<T>(req, beacon_chain, log))
}
(&Method::POST, "/validator/block") => {
validator::publish_beacon_block::<T>(req, beacon_chain, network_channel, log)

View File

@ -12,7 +12,7 @@ use futures::future::Future;
use futures::stream::Stream;
use hyper::{Body, Request};
use serde::{Deserialize, Serialize};
use slog::{info, warn, Logger};
use slog::{error, info, warn, Logger};
use ssz_derive::{Decode, Encode};
use std::sync::Arc;
use types::beacon_state::EthSpec;
@ -195,6 +195,7 @@ fn return_validator_duties<T: BeaconChainTypes>(
pub fn get_new_beacon_block<T: BeaconChainTypes>(
req: Request<Body>,
beacon_chain: Arc<BeaconChain<T>>,
log: Logger,
) -> ApiResult {
let query = UrlQuery::from_request(&req)?;
@ -204,6 +205,12 @@ pub fn get_new_beacon_block<T: BeaconChainTypes>(
let (new_block, _state) = beacon_chain
.produce_block(randao_reveal, slot)
.map_err(|e| {
error!(
log,
"Error whilst producing block";
"error" => format!("{:?}", e)
);
ApiError::ServerError(format!(
"Beacon node is not able to produce a block: {:?}",
e
@ -224,38 +231,56 @@ pub fn publish_beacon_block<T: BeaconChainTypes>(
let response_builder = ResponseBuilder::new(&req);
let body = req.into_body();
Box::new(body
.concat2()
.map_err(|e| ApiError::ServerError(format!("Unable to get request body: {:?}",e)))
.and_then(|chunks| {
serde_json::from_slice(&chunks).map_err(|e| ApiError::BadRequest(format!("Unable to parse JSON into BeaconBlock: {:?}",e)))
})
.and_then(move |block: BeaconBlock<T::EthSpec>| {
let slot = block.slot;
match beacon_chain.process_block(block.clone()) {
Ok(BlockProcessingOutcome::Processed { block_root }) => {
// Block was processed, publish via gossipsub
info!(log, "Processed valid block from API, transmitting to network."; "block_slot" => slot, "block_root" => format!("{}", block_root));
publish_beacon_block_to_network::<T>(network_chan, block)
Box::new(
body.concat2()
.map_err(|e| ApiError::ServerError(format!("Unable to get request body: {:?}", e)))
.and_then(|chunks| {
serde_json::from_slice(&chunks).map_err(|e| {
ApiError::BadRequest(format!("Unable to parse JSON into BeaconBlock: {:?}", e))
})
})
.and_then(move |block: BeaconBlock<T::EthSpec>| {
let slot = block.slot;
match beacon_chain.process_block(block.clone()) {
Ok(BlockProcessingOutcome::Processed { block_root }) => {
// Block was processed, publish via gossipsub
info!(
log,
"Block from local validator";
"block_root" => format!("{}", block_root),
"block_slot" => slot,
);
publish_beacon_block_to_network::<T>(network_chan, block)
}
Ok(outcome) => {
warn!(
log,
"Invalid block from local validator";
"outcome" => format!("{:?}", outcome)
);
Err(ApiError::ProcessingError(format!(
"The BeaconBlock could not be processed and has not been published: {:?}",
outcome
)))
}
Err(e) => {
error!(
log,
"Error whilst processing block";
"error" => format!("{:?}", e)
);
Err(ApiError::ServerError(format!(
"Error while processing block: {:?}",
e
)))
}
}
Ok(outcome) => {
warn!(log, "BeaconBlock could not be processed, but is being sent to the network anyway."; "outcome" => format!("{:?}", outcome));
publish_beacon_block_to_network::<T>(network_chan, block)?;
Err(ApiError::ProcessingError(format!(
"The BeaconBlock could not be processed, but has still been published: {:?}",
outcome
)))
}
Err(e) => {
Err(ApiError::ServerError(format!(
"Error while processing block: {:?}",
e
)))
}
}
}).and_then(|_| {
response_builder?.body_no_ssz(&())
}))
})
.and_then(|_| response_builder?.body_no_ssz(&())),
)
}
/// HTTP Handler to produce a new Attestation from the current state, ready to be signed by a validator.
@ -285,42 +310,59 @@ pub fn publish_attestation<T: BeaconChainTypes>(
try_future!(check_content_type_for_json(&req));
let response_builder = ResponseBuilder::new(&req);
Box::new(req
.into_body()
.concat2()
.map_err(|e| ApiError::ServerError(format!("Unable to get request body: {:?}",e)))
.map(|chunk| chunk.iter().cloned().collect::<Vec<u8>>())
.and_then(|chunks| {
serde_json::from_slice(&chunks.as_slice()).map_err(|e| {
ApiError::BadRequest(format!(
"Unable to deserialize JSON into a BeaconBlock: {:?}",
e
))
})
})
.and_then(move |attestation: Attestation<T::EthSpec>| {
match beacon_chain.process_attestation(attestation.clone()) {
Ok(AttestationProcessingOutcome::Processed) => {
// Block was processed, publish via gossipsub
info!(log, "Processed valid attestation from API, transmitting to network.");
publish_attestation_to_network::<T>(network_chan, attestation)
}
Ok(outcome) => {
warn!(log, "Attestation could not be processed, but is being sent to the network anyway."; "outcome" => format!("{:?}", outcome));
publish_attestation_to_network::<T>(network_chan, attestation)?;
Err(ApiError::ProcessingError(format!(
"The Attestation could not be processed, but has still been published: {:?}",
outcome
)))
}
Err(e) => {
Err(ApiError::ServerError(format!(
"Error while processing attestation: {:?}",
Box::new(
req.into_body()
.concat2()
.map_err(|e| ApiError::ServerError(format!("Unable to get request body: {:?}", e)))
.map(|chunk| chunk.iter().cloned().collect::<Vec<u8>>())
.and_then(|chunks| {
serde_json::from_slice(&chunks.as_slice()).map_err(|e| {
ApiError::BadRequest(format!(
"Unable to deserialize JSON into a BeaconBlock: {:?}",
e
)))
))
})
})
.and_then(move |attestation: Attestation<T::EthSpec>| {
match beacon_chain.process_attestation(attestation.clone()) {
Ok(AttestationProcessingOutcome::Processed) => {
// Block was processed, publish via gossipsub
info!(
log,
"Attestation from local validator";
"target" => attestation.data.source.epoch,
"source" => attestation.data.source.epoch,
"index" => attestation.data.index,
"slot" => attestation.data.slot,
);
publish_attestation_to_network::<T>(network_chan, attestation)
}
Ok(outcome) => {
warn!(
log,
"Invalid attestation from local validator";
"outcome" => format!("{:?}", outcome)
);
Err(ApiError::ProcessingError(format!(
"The Attestation could not be processed and has not been published: {:?}",
outcome
)))
}
Err(e) => {
error!(
log,
"Error whilst processing attestation";
"error" => format!("{:?}", e)
);
Err(ApiError::ServerError(format!(
"Error while processing attestation: {:?}",
e
)))
}
}
}
}).and_then(|_| {
response_builder?.body_no_ssz(&())
}))
})
.and_then(|_| response_builder?.body_no_ssz(&())),
)
}

View File

@ -131,7 +131,8 @@ impl<E: EthSpec> ProductionBeaconNode<E> {
.system_time_slot_clock()?
.websocket_event_handler(client_config.websocket_server.clone())?
.build_beacon_chain()?
.libp2p_network(&client_config.network)?;
.libp2p_network(&client_config.network)?
.notifier()?;
let builder = if client_config.rest_api.enabled {
builder.http_server(&client_config, &http_eth2_config)?
@ -139,8 +140,6 @@ impl<E: EthSpec> ProductionBeaconNode<E> {
builder
};
let builder = builder.peer_count_notifier()?.slot_notifier()?;
Ok(Self(builder.build()))
})
}

View File

@ -14,7 +14,7 @@ pub struct Config {
impl Default for Config {
fn default() -> Self {
Config {
enabled: true,
enabled: false,
listen_address: Ipv4Addr::new(127, 0, 0, 1),
port: 5053,
}

View File

@ -69,7 +69,9 @@ pub fn testing_client_config() -> ClientConfig {
// Setting ports to `0` means that the OS will choose some available port.
client_config.network.libp2p_port = 0;
client_config.network.discovery_port = 0;
client_config.rest_api.enabled = true;
client_config.rest_api.port = 0;
client_config.websocket_server.enabled = true;
client_config.websocket_server.port = 0;
client_config.dummy_eth1_backend = true;