Critical dependency logging (#4988)

* add metrics layer

* add metrics

* simplify getting the target

* make clippy happy

* fix typos

* unify deps under workspace

* make import statement shorter, fix typos

* enable warn by default, mark flag as deprecated

* do not exit on error when initializing logging fails

* revert exit on error

* adjust bootnode logging

* add logging layer

* non blocking file writer

* non blocking file writer

* add tracing visitor

* use target as is by default

* make libp2p events register correctly

* adjust repilcated cli help

* refactor tracing layer

* linting

* filesize

* log gossipsub, dont filter by log level

* turn on debug logs by default, remove deprecation warning

* truncate file, add timestamp, add unit test

* suppress output (#5)

* use tracing appender

* cleanup

* Add a task to remove old log files and upgrade to warn level

* Add the time feature for tokio

* Udeps and fmt

---------

Co-authored-by: Diva M <divma@protonmail.com>
Co-authored-by: Divma <26765164+divagant-martian@users.noreply.github.com>
Co-authored-by: Age Manning <Age@AgeManning.com>
This commit is contained in:
Eitan Seri-Levi 2024-01-16 10:44:26 +02:00 committed by GitHub
parent a68b701807
commit f22e5b0f85
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 238 additions and 35 deletions

48
Cargo.lock generated
View File

@ -4793,6 +4793,8 @@ dependencies = [
"sloggers",
"take_mut",
"tokio",
"tracing",
"tracing-appender",
"tracing-core",
"tracing-log",
"tracing-subscriber",
@ -4857,6 +4859,15 @@ version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ffbee8634e0d45d258acb448e7eaab3fce7a0a467395d4d9f228e3c1f01fb2e4"
[[package]]
name = "matchers"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558"
dependencies = [
"regex-automata 0.1.10",
]
[[package]]
name = "matches"
version = "0.1.10"
@ -6406,8 +6417,17 @@ checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
"regex-automata 0.4.3",
"regex-syntax 0.8.2",
]
[[package]]
name = "regex-automata"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
dependencies = [
"regex-syntax 0.6.29",
]
[[package]]
@ -6418,9 +6438,15 @@ checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
"regex-syntax 0.8.2",
]
[[package]]
name = "regex-syntax"
version = "0.6.29"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1"
[[package]]
name = "regex-syntax"
version = "0.8.2"
@ -8190,6 +8216,18 @@ dependencies = [
"tracing-core",
]
[[package]]
name = "tracing-appender"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3566e8ce28cc0a3fe42519fc80e6b4c943cc4c8cef275620eb8dac2d3d4e06cf"
dependencies = [
"crossbeam-channel",
"thiserror",
"time",
"tracing-subscriber",
]
[[package]]
name = "tracing-attributes"
version = "0.1.27"
@ -8238,10 +8276,14 @@ version = "0.3.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b"
dependencies = [
"matchers",
"nu-ansi-term",
"once_cell",
"regex",
"sharded-slab",
"smallvec",
"thread_local",
"tracing",
"tracing-core",
"tracing-log",
]

View File

@ -160,9 +160,10 @@ tempfile = "3"
tokio = { version = "1", features = ["rt-multi-thread", "sync", "signal"] }
tokio-stream = { version = "0.1", features = ["sync"] }
tokio-util = { version = "0.6", features = ["codec", "compat", "time"] }
tracing-appender = "0.2"
tracing-core = "0.1"
tracing-log = "0.2"
tracing-subscriber = "0.3"
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
tree_hash = "0.5"
tree_hash_derive = "0.5"
url = "2"

View File

@ -10,6 +10,7 @@ pub const DEFAULT_NETWORK_DIR: &str = "network";
pub const DEFAULT_VALIDATOR_DIR: &str = "validators";
pub const DEFAULT_SECRET_DIR: &str = "secrets";
pub const DEFAULT_WALLET_DIR: &str = "wallets";
pub const DEFAULT_TRACING_DIR: &str = "tracing";
/// Base directory name for unnamed testnets passed through the --testnet-dir flag
pub const CUSTOM_TESTNET_DIR: &str = "custom";

View File

@ -19,7 +19,9 @@ slog-async = { workspace = true }
slog-term = { workspace = true }
sloggers = { workspace = true }
take_mut = "0.2.2"
tokio = { workspace = true }
tokio = { workspace = true, features = [ "time" ] }
tracing = "0.1"
tracing-core = { workspace = true }
tracing-log = { workspace = true }
tracing-subscriber = { workspace = true }
tracing-appender = { workspace = true }

View File

@ -7,15 +7,21 @@ use lighthouse_metrics::{
use slog::Logger;
use slog_term::Decorator;
use std::io::{Result, Write};
use std::path::PathBuf;
use std::time::{Duration, Instant};
use tracing_appender::non_blocking::NonBlocking;
use tracing_logging_layer::LoggingLayer;
use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt};
pub const MAX_MESSAGE_WIDTH: usize = 40;
pub mod async_record;
mod sse_logging_components;
mod tracing_logging_layer;
mod tracing_metrics_layer;
pub use sse_logging_components::SSELoggingComponents;
pub use tracing_logging_layer::cleanup_logging_task;
pub use tracing_metrics_layer::MetricsLayer;
/// The minimum interval between log messages indicating that a queue is full.
@ -217,6 +223,48 @@ impl TimeLatch {
}
}
pub fn create_tracing_layer(base_tracing_log_path: PathBuf, turn_on_terminal_logs: bool) {
let filter_layer = match tracing_subscriber::EnvFilter::try_from_default_env()
.or_else(|_| tracing_subscriber::EnvFilter::try_new("warn"))
{
Ok(filter) => filter,
Err(e) => {
eprintln!("Failed to initialize dependency logging {e}");
return;
}
};
let libp2p_writer =
tracing_appender::rolling::daily(base_tracing_log_path.clone(), "libp2p.log");
let discv5_writer =
tracing_appender::rolling::daily(base_tracing_log_path.clone(), "discv5.log");
let (libp2p_non_blocking_writer, libp2p_guard) = NonBlocking::new(libp2p_writer);
let (discv5_non_blocking_writer, discv5_guard) = NonBlocking::new(discv5_writer);
let custom_layer = LoggingLayer {
libp2p_non_blocking_writer,
libp2p_guard,
discv5_non_blocking_writer,
discv5_guard,
};
if let Err(e) = tracing_subscriber::fmt()
.with_env_filter(filter_layer)
.with_writer(move || {
tracing_subscriber::fmt::writer::OptionalWriter::<std::io::Stdout>::from(
turn_on_terminal_logs.then(std::io::stdout),
)
})
.finish()
.with(MetricsLayer)
.with(custom_layer)
.try_init()
{
eprintln!("Failed to initialize dependency logging {e}");
}
}
/// Return a logger suitable for test usage.
///
/// By default no logs will be printed, but they can be enabled via

View File

@ -0,0 +1,115 @@
use chrono::{naive::Days, prelude::*};
use slog::{debug, warn};
use std::io::Write;
use tracing::Subscriber;
use tracing_appender::non_blocking::{NonBlocking, WorkerGuard};
use tracing_subscriber::layer::Context;
use tracing_subscriber::Layer;
pub struct LoggingLayer {
pub libp2p_non_blocking_writer: NonBlocking,
pub libp2p_guard: WorkerGuard,
pub discv5_non_blocking_writer: NonBlocking,
pub discv5_guard: WorkerGuard,
}
impl<S> Layer<S> for LoggingLayer
where
S: Subscriber,
{
fn on_event(&self, event: &tracing::Event<'_>, _ctx: Context<S>) {
let meta = event.metadata();
let log_level = meta.level();
let timestamp = Local::now().format("%Y-%m-%d %H:%M:%S").to_string();
let target = match meta.target().split_once("::") {
Some((crate_name, _)) => crate_name,
None => "unknown",
};
let mut writer = match target {
"libp2p_gossipsub" => self.libp2p_non_blocking_writer.clone(),
"discv5" => self.discv5_non_blocking_writer.clone(),
_ => return,
};
let mut visitor = LogMessageExtractor {
message: String::default(),
};
event.record(&mut visitor);
let message = format!("{} {} {}\n", timestamp, log_level, visitor.message);
if let Err(e) = writer.write_all(message.as_bytes()) {
eprintln!("Failed to write log: {}", e);
}
}
}
struct LogMessageExtractor {
message: String,
}
impl tracing_core::field::Visit for LogMessageExtractor {
fn record_debug(&mut self, _: &tracing_core::Field, value: &dyn std::fmt::Debug) {
self.message = format!("{} {:?}", self.message, value);
}
}
/// Creates a long lived async task that routinely deletes old tracing log files
pub async fn cleanup_logging_task(path: std::path::PathBuf, log: slog::Logger) {
loop {
// Delay for 1 day and then prune old logs
tokio::time::sleep(std::time::Duration::from_secs(60 * 60 * 24)).await;
let Some(yesterday_date) = chrono::prelude::Local::now()
.naive_local()
.checked_sub_days(Days::new(1))
else {
warn!(log, "Could not calculate the current date");
return;
};
// Search for old log files
let dir = path.as_path();
if dir.is_dir() {
let Ok(files) = std::fs::read_dir(dir) else {
warn!(log, "Could not read log directory contents"; "path" => ?dir);
break;
};
for file in files {
let Ok(dir_entry) = file else {
warn!(log, "Could not read file");
continue;
};
let Ok(file_name) = dir_entry.file_name().into_string() else {
warn!(log, "Could not read file"; "file" => ?dir_entry);
continue;
};
if file_name.starts_with("libp2p.log") | file_name.starts_with("discv5.log") {
let log_file_date = file_name.split('.').collect::<Vec<_>>();
if log_file_date.len() == 3 {
let Ok(log_file_date_type) =
NaiveDate::parse_from_str(log_file_date[2], "%Y-%m-%d")
else {
warn!(log, "Could not parse log file date"; "file" => file_name);
continue;
};
if log_file_date_type < yesterday_date.into() {
// Delete the file, its too old
debug!(log, "Removing old log file"; "file" => &file_name);
if let Err(e) = std::fs::remove_file(dir_entry.path()) {
warn!(log, "Failed to remove log file"; "file" => file_name, "error" => %e);
}
}
}
}
}
}
}
}

View File

@ -14,7 +14,6 @@ use slog::{crit, info};
use std::path::PathBuf;
use std::process::exit;
use task_executor::ShutdownReason;
use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt};
use types::{EthSpec, EthSpecId};
use validator_client::ProductionValidatorClient;
@ -365,34 +364,6 @@ fn main() {
}
}
// read the `RUST_LOG` statement
let filter_layer = match tracing_subscriber::EnvFilter::try_from_default_env()
.or_else(|_| tracing_subscriber::EnvFilter::try_new("debug"))
{
Ok(filter) => filter,
Err(e) => {
eprintln!("Failed to initialize dependency logging {e}");
exit(1)
}
};
let turn_on_terminal_logs = matches.is_present("env_log");
if let Err(e) = tracing_subscriber::fmt()
.with_env_filter(filter_layer)
.with_writer(move || {
tracing_subscriber::fmt::writer::OptionalWriter::<std::io::Stdout>::from(
turn_on_terminal_logs.then(std::io::stdout),
)
})
.finish()
.with(logging::MetricsLayer)
.try_init()
{
eprintln!("Failed to initialize dependency logging {e}");
exit(1)
}
let result = get_eth2_network_config(&matches).and_then(|eth2_network_config| {
let eth_spec_id = eth2_network_config.eth_spec_id()?;
@ -534,7 +505,7 @@ fn run<E: EthSpec>(
};
let logger_config = LoggerConfig {
path: log_path,
path: log_path.clone(),
debug_level: String::from(debug_level),
logfile_debug_level: String::from(logfile_debug_level),
log_format: log_format.map(String::from),
@ -557,6 +528,29 @@ fn run<E: EthSpec>(
let log = environment.core_context().log().clone();
let mut tracing_log_path: Option<PathBuf> = clap_utils::parse_optional(matches, "logfile")?;
if tracing_log_path.is_none() {
tracing_log_path = Some(
parse_path_or_default(matches, "datadir")?
.join(DEFAULT_BEACON_NODE_DIR)
.join("logs"),
)
}
let path = tracing_log_path.clone().unwrap();
let turn_on_terminal_logs = matches.is_present("env_log");
// Run a task to clean up old tracing logs.
let log_cleaner_context = environment.service_context("log_cleaner".to_string());
log_cleaner_context.executor.spawn(
logging::cleanup_logging_task(path.clone(), log.clone()),
"log_cleaner",
);
logging::create_tracing_layer(path, turn_on_terminal_logs);
// Allow Prometheus to export the time at which the process was started.
metrics::expose_process_start_time(&log);