Add specific log and metric for delayed blocks (#2308)

## Issue Addressed NA ## Proposed Changes - Adds a specific log and metric for when a block is enshrined as head with a delay that will caused bad attestations - We *technically* already expose this information, but it's a little tricky to determine during debugging. This makes it nice and explicit. - Fixes a minor reporting bug with the validator monitor where it was expecting agg. attestations too early (at half-slot rather than two-thirds-slot). ## Additional Info NA
2021-04-13 02:16:59 +00:00 · 2021-04-13 02:16:59 +00:00 · c1203f5e52
commit c1203f5e52
parent 0df7be1814
4 changed files with 40 additions and 5 deletions
--- a/beacon_node/beacon_chain/src/beacon_chain.rs
+++ b/beacon_node/beacon_chain/src/beacon_chain.rs
@ -2214,12 +2214,32 @@ impl<T: BeaconChainTypes> BeaconChain<T> {

        metrics::stop_timer(update_head_timer);

+        let block_delay = get_slot_delay_ms(timestamp_now(), head_slot, &self.slot_clock);
+
        // Observe the delay between the start of the slot and when we set the block as head.
        metrics::observe_duration(
            &metrics::BEACON_BLOCK_HEAD_SLOT_START_DELAY_TIME,
-            get_slot_delay_ms(timestamp_now(), head_slot, &self.slot_clock),
+            block_delay,
        );

+        // If the block was enshrined as head too late for attestations to be created for it, log a
+        // debug warning and increment a metric.
+        //
+        // Don't create this log if the block was > 4 slots old, this helps prevent noise during
+        // sync.
+        if block_delay >= self.slot_clock.unagg_attestation_production_delay()
+            && block_delay < self.slot_clock.slot_duration() * 4
+        {
+            metrics::inc_counter(&metrics::BEACON_BLOCK_HEAD_SLOT_START_DELAY_EXCEEDED_TOTAL);
+            debug!(
+                self.log,
+                "Delayed head block";
+                "delay" => ?block_delay,
+                "root" => ?beacon_block_root,
+                "slot" => head_slot,
+            );
+        }
+
        self.snapshot_cache
            .try_write_for(BLOCK_PROCESSING_CACHE_LOCK_TIMEOUT)
            .map(|mut snapshot_cache| {
--- a/beacon_node/beacon_chain/src/metrics.rs
+++ b/beacon_node/beacon_chain/src/metrics.rs
@ -629,6 +629,11 @@ lazy_static! {
        "beacon_block_head_slot_start_delay_time",
        "Duration between the start of the blocks slot and the current time when it was as head.",
    );
+    pub static ref BEACON_BLOCK_HEAD_SLOT_START_DELAY_EXCEEDED_TOTAL: Result<IntCounter> = try_create_int_counter(
+        "beacon_block_head_slot_start_delay_exceeded_total",
+        "Triggered when the duration between the start of the blocks slot and the current time \
+        will result in failed attestations.",
+    );

    /*
     * General block metrics
--- a/beacon_node/beacon_chain/src/validator_monitor.rs
+++ b/beacon_node/beacon_chain/src/validator_monitor.rs
@ -528,8 +528,7 @@ impl<T: EthSpec> ValidatorMonitor<T> {
            .start_of(data.slot)
            .and_then(|slot_start| seen_timestamp.checked_sub(slot_start))
            .and_then(|gross_delay| {
-                let production_delay = slot_clock.slot_duration() / 3;
-                gross_delay.checked_sub(production_delay)
+                gross_delay.checked_sub(slot_clock.unagg_attestation_production_delay())
            })
            .unwrap_or_else(|| Duration::from_secs(0))
    }
@ -619,8 +618,7 @@ impl<T: EthSpec> ValidatorMonitor<T> {
            .start_of(data.slot)
            .and_then(|slot_start| seen_timestamp.checked_sub(slot_start))
            .and_then(|gross_delay| {
-                let production_delay = slot_clock.slot_duration() / 2;
-                gross_delay.checked_sub(production_delay)
+                gross_delay.checked_sub(slot_clock.agg_attestation_production_delay())
            })
            .unwrap_or_else(|| Duration::from_secs(0))
    }
--- a/common/slot_clock/src/lib.rs
+++ b/common/slot_clock/src/lib.rs
@ -75,4 +75,16 @@ pub trait SlotClock: Send + Sync + Sized + Clone {
        self.slot_of(self.now_duration()?.checked_sub(tolerance)?)
            .or_else(|| Some(self.genesis_slot()))
    }
+
+    /// Returns the delay between the start of the slot and when unaggregated attestations should be
+    /// produced.
+    fn unagg_attestation_production_delay(&self) -> Duration {
+        self.slot_duration() / 3
+    }
+
+    /// Returns the delay between the start of the slot and when aggregated attestations should be
+    /// produced.
+    fn agg_attestation_production_delay(&self) -> Duration {
+        self.slot_duration() * 2 / 3
+    }
 }