From 5cde3fc4daf2eaa6407f16af196c092bf6b1406d Mon Sep 17 00:00:00 2001 From: Michael Sproul Date: Fri, 15 Oct 2021 03:28:03 +0000 Subject: [PATCH] Reduce lock contention in backfill sync (#2716) ## Proposed Changes Clone the proposer pubkeys during backfill signature verification to reduce the time that the pubkey cache lock is held for. Cloning such a small number of pubkeys has negligible impact on the total running time, but greatly reduces lock contention. On a Ryzen 5950X, the setup step seems to take around 180us regardless of whether the key is cloned or not, while the verification takes 7ms. When Lighthouse is limited to 10% of one core using `sudo cpulimit --pid --limit 10` the total time jumps up to 800ms, but the setup step remains only 250us. This means that under heavy load this PR could cut the time the lock is held for from 800ms to 250us, which is a huge saving of 99.97%! --- .../beacon_chain/src/historical_blocks.rs | 13 ++++++++++--- beacon_node/beacon_chain/src/metrics.rs | 16 ++++++++++++++++ 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/beacon_node/beacon_chain/src/historical_blocks.rs b/beacon_node/beacon_chain/src/historical_blocks.rs index 7e75dae53..234e6c64e 100644 --- a/beacon_node/beacon_chain/src/historical_blocks.rs +++ b/beacon_node/beacon_chain/src/historical_blocks.rs @@ -1,4 +1,4 @@ -use crate::{errors::BeaconChainError as Error, BeaconChain, BeaconChainTypes}; +use crate::{errors::BeaconChainError as Error, metrics, BeaconChain, BeaconChainTypes}; use itertools::Itertools; use slog::debug; use state_processing::{ @@ -136,6 +136,8 @@ impl BeaconChain { // Verify signatures in one batch, holding the pubkey cache lock for the shortest duration // possible. For each block fetch the parent root from its successor. Slicing from index 1 // is safe because we've already checked that `blocks_to_import` is non-empty. + let sig_timer = metrics::start_timer(&metrics::BACKFILL_SIGNATURE_TOTAL_TIMES); + let setup_timer = metrics::start_timer(&metrics::BACKFILL_SIGNATURE_SETUP_TIMES); let pubkey_cache = self .validator_pubkey_cache .try_read_for(PUBKEY_CACHE_LOCK_TIMEOUT) @@ -156,17 +158,22 @@ impl BeaconChain { block.message().proposer_index(), &self.spec.fork_at_epoch(block.message().epoch()), self.genesis_validators_root, - |validator_index| pubkey_cache.get(validator_index).map(Cow::Borrowed), + |validator_index| pubkey_cache.get(validator_index).cloned().map(Cow::Owned), &self.spec, ) }) .collect::, _>>() .map_err(HistoricalBlockError::SignatureSet) .map(ParallelSignatureSets::from)?; + drop(pubkey_cache); + drop(setup_timer); + + let verify_timer = metrics::start_timer(&metrics::BACKFILL_SIGNATURE_VERIFY_TIMES); if !signature_set.verify() { return Err(HistoricalBlockError::InvalidSignature.into()); } - drop(pubkey_cache); + drop(verify_timer); + drop(sig_timer); // Write the I/O batches to disk, writing the blocks themselves first, as it's better // for the hot DB to contain extra blocks than for the cold DB to point to blocks that diff --git a/beacon_node/beacon_chain/src/metrics.rs b/beacon_node/beacon_chain/src/metrics.rs index 136002cb8..2128eace4 100644 --- a/beacon_node/beacon_chain/src/metrics.rs +++ b/beacon_node/beacon_chain/src/metrics.rs @@ -855,6 +855,22 @@ lazy_static! { "beacon_sync_committee_message_processing_signature_seconds", "Time spent on the signature verification of sync message processing" ); + + /* + * Checkpoint sync & backfill + */ + pub static ref BACKFILL_SIGNATURE_SETUP_TIMES: Result = try_create_histogram( + "beacon_backfill_signature_setup_seconds", + "Time spent constructing the signature set during backfill sync" + ); + pub static ref BACKFILL_SIGNATURE_VERIFY_TIMES: Result = try_create_histogram( + "beacon_backfill_signature_verify_seconds", + "Time spent verifying the signature set during backfill sync" + ); + pub static ref BACKFILL_SIGNATURE_TOTAL_TIMES: Result = try_create_histogram( + "beacon_backfill_signature_total_seconds", + "Time spent verifying the signature set during backfill sync, including setup" + ); } /// Scrape the `beacon_chain` for metrics that are not constantly updated (e.g., the present slot,