diff --git a/Cargo.lock b/Cargo.lock index a7c33a672..0bdb0eb88 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2897,6 +2897,7 @@ dependencies = [ "lazy_static", "lighthouse_metrics", "lighthouse_version", + "malloc_utils", "prometheus", "reqwest", "serde", @@ -3748,6 +3749,7 @@ dependencies = [ "lighthouse_metrics", "lighthouse_version", "logging", + "malloc_utils", "remote_signer", "serde_json", "slashing_protection", @@ -3876,6 +3878,17 @@ dependencies = [ "libc", ] +[[package]] +name = "malloc_utils" +version = "0.1.0" +dependencies = [ + "lazy_static", + "libc", + "lighthouse_metrics", + "num_cpus", + "parking_lot", +] + [[package]] name = "maplit" version = "1.0.2" diff --git a/Cargo.toml b/Cargo.toml index f36c7bac8..89f4b0f2f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -32,6 +32,7 @@ members = [ "common/lockfile", "common/logging", "common/lru_cache", + "common/malloc_utils", "common/remote_signer_consumer", "common/sensitive_url", "common/slot_clock", diff --git a/beacon_node/http_metrics/Cargo.toml b/beacon_node/http_metrics/Cargo.toml index 783948746..e1746781b 100644 --- a/beacon_node/http_metrics/Cargo.toml +++ b/beacon_node/http_metrics/Cargo.toml @@ -20,6 +20,7 @@ lazy_static = "1.4.0" eth2 = { path = "../../common/eth2" } lighthouse_version = { path = "../../common/lighthouse_version" } warp_utils = { path = "../../common/warp_utils" } +malloc_utils = { path = "../../common/malloc_utils" } [dev-dependencies] tokio = { version = "1.1.0", features = ["sync"] } diff --git a/beacon_node/http_metrics/src/lib.rs b/beacon_node/http_metrics/src/lib.rs index 4837d513b..66c7a6a6f 100644 --- a/beacon_node/http_metrics/src/lib.rs +++ b/beacon_node/http_metrics/src/lib.rs @@ -49,6 +49,7 @@ pub struct Config { pub listen_addr: Ipv4Addr, pub listen_port: u16, pub allow_origin: Option, + pub allocator_metrics_enabled: bool, } impl Default for Config { @@ -58,6 +59,7 @@ impl Default for Config { listen_addr: Ipv4Addr::new(127, 0, 0, 1), listen_port: 5054, allow_origin: None, + allocator_metrics_enabled: true, } } } diff --git a/beacon_node/http_metrics/src/metrics.rs b/beacon_node/http_metrics/src/metrics.rs index 3d1b125ea..4a870c889 100644 --- a/beacon_node/http_metrics/src/metrics.rs +++ b/beacon_node/http_metrics/src/metrics.rs @@ -1,6 +1,7 @@ use crate::Context; use beacon_chain::BeaconChainTypes; use lighthouse_metrics::{Encoder, TextEncoder}; +use malloc_utils::scrape_allocator_metrics; pub use lighthouse_metrics::*; @@ -41,6 +42,12 @@ pub fn gather_prometheus_metrics( warp_utils::metrics::scrape_health_metrics(); + // It's important to ensure these metrics are explicitly enabled in the case that users aren't + // using glibc and this function causes panics. + if ctx.config.allocator_metrics_enabled { + scrape_allocator_metrics(); + } + encoder .encode(&lighthouse_metrics::gather(), &mut buffer) .unwrap(); diff --git a/beacon_node/http_metrics/tests/tests.rs b/beacon_node/http_metrics/tests/tests.rs index db161c6d6..633b81115 100644 --- a/beacon_node/http_metrics/tests/tests.rs +++ b/beacon_node/http_metrics/tests/tests.rs @@ -20,6 +20,7 @@ async fn returns_200_ok() { listen_addr: Ipv4Addr::new(127, 0, 0, 1), listen_port: 0, allow_origin: None, + allocator_metrics_enabled: true, }, chain: None, db_path: None, diff --git a/beacon_node/src/config.rs b/beacon_node/src/config.rs index 6e4c9aa7d..b29d6680c 100644 --- a/beacon_node/src/config.rs +++ b/beacon_node/src/config.rs @@ -1,5 +1,5 @@ use clap::ArgMatches; -use clap_utils::BAD_TESTNET_DIR_MESSAGE; +use clap_utils::{flags::DISABLE_MALLOC_TUNING_FLAG, BAD_TESTNET_DIR_MESSAGE}; use client::{ClientConfig, ClientGenesis}; use directory::{DEFAULT_BEACON_NODE_DIR, DEFAULT_NETWORK_DIR, DEFAULT_ROOT_DIR}; use eth2_libp2p::{multiaddr::Protocol, Enr, Multiaddr, NetworkConfig, PeerIdSerialized}; @@ -156,6 +156,11 @@ pub fn get_config( ); } + // Do not scrape for malloc metrics if we've disabled tuning malloc as it may cause panics. + if cli_args.is_present(DISABLE_MALLOC_TUNING_FLAG) { + client_config.http_metrics.allocator_metrics_enabled = false; + } + /* * Eth1 */ diff --git a/common/clap_utils/src/flags.rs b/common/clap_utils/src/flags.rs new file mode 100644 index 000000000..8ae0cf032 --- /dev/null +++ b/common/clap_utils/src/flags.rs @@ -0,0 +1,3 @@ +//! CLI flags used across the Lighthouse code base can be located here. + +pub const DISABLE_MALLOC_TUNING_FLAG: &str = "disable-malloc-tuning"; diff --git a/common/clap_utils/src/lib.rs b/common/clap_utils/src/lib.rs index 2d43a1b28..dc82cbe66 100644 --- a/common/clap_utils/src/lib.rs +++ b/common/clap_utils/src/lib.rs @@ -6,6 +6,8 @@ use ssz::Decode; use std::path::PathBuf; use std::str::FromStr; +pub mod flags; + pub const BAD_TESTNET_DIR_MESSAGE: &str = "The hard-coded testnet directory was invalid. \ This happens when Lighthouse is migrating between spec versions \ or when there is no default public network to connect to. \ diff --git a/common/malloc_utils/Cargo.toml b/common/malloc_utils/Cargo.toml new file mode 100644 index 000000000..89cc0d362 --- /dev/null +++ b/common/malloc_utils/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "malloc_utils" +version = "0.1.0" +authors = ["Paul Hauner "] +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +lighthouse_metrics = { path = "../lighthouse_metrics" } +lazy_static = "1.4.0" +libc = "0.2.79" +parking_lot = "0.11.0" +num_cpus = "1.13.0" diff --git a/common/malloc_utils/src/glibc.rs b/common/malloc_utils/src/glibc.rs new file mode 100644 index 000000000..9cefc6441 --- /dev/null +++ b/common/malloc_utils/src/glibc.rs @@ -0,0 +1,210 @@ +//! Contains functions for tuning and controlling "The GNU Allocator", included in the `glibc` +//! library. +//! +//! https://www.gnu.org/software/libc/manual/html_node/The-GNU-Allocator.html +//! +//! These functions are generally only suitable for Linux systems. +use lazy_static::lazy_static; +use lighthouse_metrics::*; +use parking_lot::Mutex; +use std::env; +use std::os::raw::c_int; +use std::result::Result; + +/// The value to be provided to `malloc_mmap_threshold`. +/// +/// Value chosen so that values of the validators tree hash cache will *not* be allocated via +/// `mmap`. +/// +/// The size of a single chunk is: +/// +/// NODES_PER_VALIDATOR * VALIDATORS_PER_ARENA * 32 = 15 * 4096 * 32 = 1.875 MiB +const OPTIMAL_MMAP_THRESHOLD: c_int = 2 * 1_024 * 1_024; + +/// The maximum number of arenas allowed to be created by malloc. +/// +/// See `ArenaMaxSetting` docs for details. +const OPTIMAL_ARENA_MAX: ArenaMaxSetting = ArenaMaxSetting::NumCpus; + +/// Constants used to configure malloc internals. +/// +/// Source: +/// +/// https://github.com/lattera/glibc/blob/895ef79e04a953cac1493863bcae29ad85657ee1/malloc/malloc.h#L115-L123 +const M_MMAP_THRESHOLD: c_int = -4; +const M_ARENA_MAX: c_int = -8; + +/// Environment variables used to configure malloc. +/// +/// Source: +/// +/// https://man7.org/linux/man-pages/man3/mallopt.3.html +const ENV_VAR_ARENA_MAX: &str = "MALLOC_ARENA_MAX"; +const ENV_VAR_MMAP_THRESHOLD: &str = "MALLOC_MMAP_THRESHOLD_"; + +#[allow(dead_code)] +enum ArenaMaxSetting { + /// Do not set any value for MALLOC_ARENA_MAX, leave it as default. + DoNotSet, + /// Set a fixed value. + Fixed(c_int), + /// Read the number of CPUs at runtime and use that value. + NumCpus, +} + +lazy_static! { + pub static ref GLOBAL_LOCK: Mutex<()> = <_>::default(); +} + +// Metrics for the malloc. For more information, see: +// +// https://man7.org/linux/man-pages/man3/mallinfo.3.html +lazy_static! { + pub static ref MALLINFO_ARENA: lighthouse_metrics::Result = try_create_int_gauge( + "mallinfo_arena", + "The total amount of memory allocated by means other than mmap(2). \ + This figure includes both in-use blocks and blocks on the free list.", + ); + pub static ref MALLINFO_ORDBLKS: lighthouse_metrics::Result = try_create_int_gauge( + "mallinfo_ordblks", + "The number of ordinary (i.e., non-fastbin) free blocks.", + ); + pub static ref MALLINFO_SMBLKS: lighthouse_metrics::Result = + try_create_int_gauge("mallinfo_smblks", "The number of fastbin free blocks.",); + pub static ref MALLINFO_HBLKS: lighthouse_metrics::Result = try_create_int_gauge( + "mallinfo_hblks", + "The number of blocks currently allocated using mmap.", + ); + pub static ref MALLINFO_HBLKHD: lighthouse_metrics::Result = try_create_int_gauge( + "mallinfo_hblkhd", + "The number of bytes in blocks currently allocated using mmap.", + ); + pub static ref MALLINFO_FSMBLKS: lighthouse_metrics::Result = try_create_int_gauge( + "mallinfo_fsmblks", + "The total number of bytes in fastbin free blocks.", + ); + pub static ref MALLINFO_UORDBLKS: lighthouse_metrics::Result = try_create_int_gauge( + "mallinfo_uordblks", + "The total number of bytes used by in-use allocations.", + ); + pub static ref MALLINFO_FORDBLKS: lighthouse_metrics::Result = try_create_int_gauge( + "mallinfo_fordblks", + "The total number of bytes in free blocks.", + ); + pub static ref MALLINFO_KEEPCOST: lighthouse_metrics::Result = try_create_int_gauge( + "mallinfo_keepcost", + "The total amount of releasable free space at the top of the heap..", + ); +} + +/// Calls `mallinfo` and updates Prometheus metrics with the results. +pub fn scrape_mallinfo_metrics() { + // The docs for this function say it is thread-unsafe since it may return inconsistent results. + // Since these are just metrics it's not a concern to us if they're sometimes inconsistent. + // + // There exists a `malloc2` function, however it was release in February 2021 and this seems too + // recent to rely on. + // + // Docs: + // + // https://man7.org/linux/man-pages/man3/mallinfo.3.html + let mallinfo = mallinfo(); + + set_gauge(&MALLINFO_ARENA, mallinfo.arena as i64); + set_gauge(&MALLINFO_ORDBLKS, mallinfo.ordblks as i64); + set_gauge(&MALLINFO_SMBLKS, mallinfo.smblks as i64); + set_gauge(&MALLINFO_HBLKS, mallinfo.hblks as i64); + set_gauge(&MALLINFO_HBLKHD, mallinfo.hblkhd as i64); + set_gauge(&MALLINFO_FSMBLKS, mallinfo.fsmblks as i64); + set_gauge(&MALLINFO_UORDBLKS, mallinfo.uordblks as i64); + set_gauge(&MALLINFO_FORDBLKS, mallinfo.fordblks as i64); + set_gauge(&MALLINFO_KEEPCOST, mallinfo.keepcost as i64); +} + +/// Perform all configuration routines. +pub fn configure_glibc_malloc() -> Result<(), String> { + if !env_var_present(ENV_VAR_ARENA_MAX) { + let arena_max = match OPTIMAL_ARENA_MAX { + ArenaMaxSetting::DoNotSet => None, + ArenaMaxSetting::Fixed(n) => Some(n), + ArenaMaxSetting::NumCpus => Some(num_cpus::get() as c_int), + }; + + if let Some(max) = arena_max { + if let Err(e) = malloc_arena_max(max) { + return Err(format!("failed (code {}) to set malloc max arena count", e)); + } + } + } + + if !env_var_present(ENV_VAR_MMAP_THRESHOLD) { + if let Err(e) = malloc_mmap_threshold(OPTIMAL_MMAP_THRESHOLD) { + return Err(format!("failed (code {}) to set malloc mmap threshold", e)); + } + } + + Ok(()) +} + +/// Returns `true` if an environment variable is present. +fn env_var_present(name: &str) -> bool { + env::var(name) != Err(env::VarError::NotPresent) +} + +/// Uses `mallopt` to set the `M_ARENA_MAX` value, specifying the number of memory arenas to be +/// created by malloc. +/// +/// Generally speaking, a smaller arena count reduces memory fragmentation at the cost of memory contention +/// between threads. +/// +/// ## Resources +/// +/// - https://man7.org/linux/man-pages/man3/mallopt.3.html +fn malloc_arena_max(num_arenas: c_int) -> Result<(), c_int> { + into_result(mallopt(M_ARENA_MAX, num_arenas)) +} + +/// Uses `mallopt` to set the `M_MMAP_THRESHOLD` value, specifying the threshold where objects of this +/// size or larger are allocated via an `mmap`. +/// +/// ## Resources +/// +/// - https://man7.org/linux/man-pages/man3/mallopt.3.html +fn malloc_mmap_threshold(num_arenas: c_int) -> Result<(), c_int> { + into_result(mallopt(M_MMAP_THRESHOLD, num_arenas)) +} + +fn mallopt(param: c_int, val: c_int) -> c_int { + // Prevent this function from being called in parallel with any other non-thread-safe function. + let _lock = GLOBAL_LOCK.lock(); + unsafe { libc::mallopt(param, val) } +} + +fn mallinfo() -> libc::mallinfo { + // Prevent this function from being called in parallel with any other non-thread-safe function. + let _lock = GLOBAL_LOCK.lock(); + unsafe { libc::mallinfo() } +} + +fn into_result(result: c_int) -> Result<(), c_int> { + if result == 1 { + Ok(()) + } else { + Err(result) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn malloc_arena_max_does_not_panic() { + malloc_arena_max(2).unwrap(); + } + + #[test] + fn malloc_mmap_threshold_does_not_panic() { + malloc_mmap_threshold(OPTIMAL_MMAP_THRESHOLD).unwrap(); + } +} diff --git a/common/malloc_utils/src/lib.rs b/common/malloc_utils/src/lib.rs new file mode 100644 index 000000000..b8aed948f --- /dev/null +++ b/common/malloc_utils/src/lib.rs @@ -0,0 +1,47 @@ +//! Provides utilities for configuring the system allocator. +//! +//! ## Conditional Compilation +//! +//! Presently, only configuration for "The GNU Allocator" from `glibc` is supported. All other +//! allocators are ignored. +//! +//! It is assumed that if the following two statements are correct then we should expect to +//! configure `glibc`: +//! +//! - `target_os = linux` +//! - `target_env != musl` +//! +//! In all other cases this library will not attempt to do anything (i.e., all functions are +//! no-ops). +//! +//! If the above conditions are fulfilled but `glibc` still isn't present at runtime then a panic +//! may be triggered. It is understood that there's no way to be certain that a compatible `glibc` +//! is present: https://github.com/rust-lang/rust/issues/33244. +//! +//! ## Notes +//! +//! It's not clear how to precisely determine what the underlying allocator is. The efforts at +//! detecting `glibc` are best-effort. If this crate throws errors about undefined external +//! functions, then try to compile with the `not_glibc_interface` module. + +#[cfg(all(target_os = "linux", not(target_env = "musl")))] +mod glibc; + +pub use interface::*; + +#[cfg(all(target_os = "linux", not(target_env = "musl")))] +mod interface { + pub use crate::glibc::configure_glibc_malloc as configure_memory_allocator; + pub use crate::glibc::scrape_mallinfo_metrics as scrape_allocator_metrics; +} + +#[cfg(any(not(target_os = "linux"), target_env = "musl"))] +mod interface { + #[allow(dead_code, clippy::unnecessary_wraps)] + pub fn configure_memory_allocator() -> Result<(), String> { + Ok(()) + } + + #[allow(dead_code)] + pub fn scrape_allocator_metrics() {} +} diff --git a/lighthouse/Cargo.toml b/lighthouse/Cargo.toml index 73321879a..60009400a 100644 --- a/lighthouse/Cargo.toml +++ b/lighthouse/Cargo.toml @@ -46,6 +46,7 @@ lighthouse_metrics = { path = "../common/lighthouse_metrics" } lazy_static = "1.4.0" serde_json = "1.0.59" task_executor = { path = "../common/task_executor" } +malloc_utils = { path = "../common/malloc_utils" } [dev-dependencies] tempfile = "3.1.0" diff --git a/lighthouse/src/main.rs b/lighthouse/src/main.rs index 636b9d81d..fd299e04e 100644 --- a/lighthouse/src/main.rs +++ b/lighthouse/src/main.rs @@ -2,10 +2,12 @@ mod metrics; use beacon_node::{get_eth2_network_config, ProductionBeaconNode}; use clap::{App, Arg, ArgMatches}; +use clap_utils::flags::DISABLE_MALLOC_TUNING_FLAG; use env_logger::{Builder, Env}; use environment::EnvironmentBuilder; use eth2_network_config::{Eth2NetworkConfig, DEFAULT_HARDCODED_NETWORK}; use lighthouse_version::VERSION; +use malloc_utils::configure_memory_allocator; use slog::{crit, info, warn}; use std::fs::File; use std::path::PathBuf; @@ -144,6 +146,16 @@ fn main() { Used for testing only, DO NOT USE IN PRODUCTION.") .global(true) ) + .arg( + Arg::with_name(DISABLE_MALLOC_TUNING_FLAG) + .long(DISABLE_MALLOC_TUNING_FLAG) + .help( + "If present, do not configure the system allocator. Providing this flag will \ + generally increase memory usage, it should only be provided when debugging \ + specific memory allocation issues." + ) + .global(true), + ) .subcommand(beacon_node::cli_app()) .subcommand(boot_node::cli_app()) .subcommand(validator_client::cli_app()) @@ -151,6 +163,19 @@ fn main() { .subcommand(remote_signer::cli_app()) .get_matches(); + // Configure the allocator early in the process, before it has the chance to use the default values for + // anything important. + if !matches.is_present(DISABLE_MALLOC_TUNING_FLAG) { + if let Err(e) = configure_memory_allocator() { + eprintln!( + "Unable to configure the memory allocator: {} \n\ + Try providing the --{} flag", + e, DISABLE_MALLOC_TUNING_FLAG + ); + exit(1) + } + } + // Debugging output for libp2p and external crates. if matches.is_present("env_log") { Builder::from_env(Env::default()).init(); diff --git a/lighthouse/tests/beacon_node.rs b/lighthouse/tests/beacon_node.rs index d21a4845c..249033377 100644 --- a/lighthouse/tests/beacon_node.rs +++ b/lighthouse/tests/beacon_node.rs @@ -800,6 +800,15 @@ fn slasher_broadcast_flag() { }); } #[test] +pub fn malloc_tuning_flag() { + CommandLineTest::new() + .flag("disable-malloc-tuning", None) + .run() + .with_config(|config| { + assert!(!config.http_metrics.allocator_metrics_enabled); + }); +} +#[test] #[should_panic] fn ensure_panic_on_failed_launch() { CommandLineTest::new() diff --git a/lighthouse/tests/validator_client.rs b/lighthouse/tests/validator_client.rs index 435c45dd1..a0982064b 100644 --- a/lighthouse/tests/validator_client.rs +++ b/lighthouse/tests/validator_client.rs @@ -344,3 +344,11 @@ fn metrics_allow_origin_all_flag() { .run() .with_config(|config| assert_eq!(config.http_metrics.allow_origin, Some("*".to_string()))); } +#[test] +pub fn malloc_tuning_flag() { + CommandLineTest::new() + .flag("disable-malloc-tuning", None) + // Simply ensure that the node can start with this flag, it's very difficult to observe the + // effects of it. + .run(); +}