Avoid penalizing peers for delays during processing (#2894)
## Issue Addressed NA ## Proposed Changes We have observed occasions were under-resourced nodes will receive messages that were valid *at the time*, but later become invalidated due to long waits for a `BeaconProcessor` worker. In this PR, we will check to see if the message was valid *at the time of receipt*. If it was initially valid but invalid now, we just ignore the message without penalizing the peer. ## Additional Info NA
This commit is contained in:
parent
b656007963
commit
61f60bdf03
@ -452,7 +452,7 @@ impl<'a, T: BeaconChainTypes> IndexedAggregatedAttestation<'a, T> {
|
||||
// MAXIMUM_GOSSIP_CLOCK_DISPARITY allowance).
|
||||
//
|
||||
// We do not queue future attestations for later processing.
|
||||
verify_propagation_slot_range(chain, attestation)?;
|
||||
verify_propagation_slot_range(&chain.slot_clock, attestation)?;
|
||||
|
||||
// Check the attestation's epoch matches its target.
|
||||
if attestation.data.slot.epoch(T::EthSpec::slots_per_epoch())
|
||||
@ -716,7 +716,7 @@ impl<'a, T: BeaconChainTypes> IndexedUnaggregatedAttestation<'a, T> {
|
||||
// MAXIMUM_GOSSIP_CLOCK_DISPARITY allowance).
|
||||
//
|
||||
// We do not queue future attestations for later processing.
|
||||
verify_propagation_slot_range(chain, attestation)?;
|
||||
verify_propagation_slot_range(&chain.slot_clock, attestation)?;
|
||||
|
||||
// Check to ensure that the attestation is "unaggregated". I.e., it has exactly one
|
||||
// aggregation bit set.
|
||||
@ -1019,14 +1019,13 @@ fn verify_head_block_is_known<T: BeaconChainTypes>(
|
||||
/// to the current slot of the `chain`.
|
||||
///
|
||||
/// Accounts for `MAXIMUM_GOSSIP_CLOCK_DISPARITY`.
|
||||
pub fn verify_propagation_slot_range<T: BeaconChainTypes>(
|
||||
chain: &BeaconChain<T>,
|
||||
attestation: &Attestation<T::EthSpec>,
|
||||
pub fn verify_propagation_slot_range<S: SlotClock, E: EthSpec>(
|
||||
slot_clock: &S,
|
||||
attestation: &Attestation<E>,
|
||||
) -> Result<(), Error> {
|
||||
let attestation_slot = attestation.data.slot;
|
||||
|
||||
let latest_permissible_slot = chain
|
||||
.slot_clock
|
||||
let latest_permissible_slot = slot_clock
|
||||
.now_with_future_tolerance(MAXIMUM_GOSSIP_CLOCK_DISPARITY)
|
||||
.ok_or(BeaconChainError::UnableToReadSlot)?;
|
||||
if attestation_slot > latest_permissible_slot {
|
||||
@ -1037,11 +1036,10 @@ pub fn verify_propagation_slot_range<T: BeaconChainTypes>(
|
||||
}
|
||||
|
||||
// Taking advantage of saturating subtraction on `Slot`.
|
||||
let earliest_permissible_slot = chain
|
||||
.slot_clock
|
||||
let earliest_permissible_slot = slot_clock
|
||||
.now_with_past_tolerance(MAXIMUM_GOSSIP_CLOCK_DISPARITY)
|
||||
.ok_or(BeaconChainError::UnableToReadSlot)?
|
||||
- T::EthSpec::slots_per_epoch();
|
||||
- E::slots_per_epoch();
|
||||
if attestation_slot < earliest_permissible_slot {
|
||||
return Err(Error::PastSlot {
|
||||
attestation_slot,
|
||||
|
@ -273,7 +273,7 @@ impl<T: BeaconChainTypes> VerifiedSyncContribution<T> {
|
||||
let subcommittee_index = contribution.subcommittee_index as usize;
|
||||
|
||||
// Ensure sync committee contribution is within the MAXIMUM_GOSSIP_CLOCK_DISPARITY allowance.
|
||||
verify_propagation_slot_range(chain, contribution)?;
|
||||
verify_propagation_slot_range(&chain.slot_clock, contribution)?;
|
||||
|
||||
// Validate subcommittee index.
|
||||
if contribution.subcommittee_index >= SYNC_COMMITTEE_SUBNET_COUNT {
|
||||
@ -428,7 +428,7 @@ impl VerifiedSyncCommitteeMessage {
|
||||
// MAXIMUM_GOSSIP_CLOCK_DISPARITY allowance).
|
||||
//
|
||||
// We do not queue future sync committee messages for later processing.
|
||||
verify_propagation_slot_range(chain, &sync_message)?;
|
||||
verify_propagation_slot_range(&chain.slot_clock, &sync_message)?;
|
||||
|
||||
// Ensure the `subnet_id` is valid for the given validator.
|
||||
let pubkey = chain
|
||||
@ -516,14 +516,13 @@ impl VerifiedSyncCommitteeMessage {
|
||||
/// to the current slot of the `chain`.
|
||||
///
|
||||
/// Accounts for `MAXIMUM_GOSSIP_CLOCK_DISPARITY`.
|
||||
pub fn verify_propagation_slot_range<T: BeaconChainTypes, U: SlotData>(
|
||||
chain: &BeaconChain<T>,
|
||||
pub fn verify_propagation_slot_range<S: SlotClock, U: SlotData>(
|
||||
slot_clock: &S,
|
||||
sync_contribution: &U,
|
||||
) -> Result<(), Error> {
|
||||
let message_slot = sync_contribution.get_slot();
|
||||
|
||||
let latest_permissible_slot = chain
|
||||
.slot_clock
|
||||
let latest_permissible_slot = slot_clock
|
||||
.now_with_future_tolerance(MAXIMUM_GOSSIP_CLOCK_DISPARITY)
|
||||
.ok_or(BeaconChainError::UnableToReadSlot)?;
|
||||
if message_slot > latest_permissible_slot {
|
||||
@ -533,8 +532,7 @@ pub fn verify_propagation_slot_range<T: BeaconChainTypes, U: SlotData>(
|
||||
});
|
||||
}
|
||||
|
||||
let earliest_permissible_slot = chain
|
||||
.slot_clock
|
||||
let earliest_permissible_slot = slot_clock
|
||||
.now_with_past_tolerance(MAXIMUM_GOSSIP_CLOCK_DISPARITY)
|
||||
.ok_or(BeaconChainError::UnableToReadSlot)?;
|
||||
|
||||
|
@ -2,9 +2,9 @@ use crate::{metrics, service::NetworkMessage, sync::SyncMessage};
|
||||
|
||||
use beacon_chain::store::Error;
|
||||
use beacon_chain::{
|
||||
attestation_verification::{Error as AttnError, VerifiedAttestation},
|
||||
attestation_verification::{self, Error as AttnError, VerifiedAttestation},
|
||||
observed_operations::ObservationOutcome,
|
||||
sync_committee_verification::Error as SyncCommitteeError,
|
||||
sync_committee_verification::{self, Error as SyncCommitteeError},
|
||||
validator_monitor::get_block_delay_ms,
|
||||
BeaconChainError, BeaconChainTypes, BlockError, ExecutionPayloadError, ForkChoiceError,
|
||||
GossipVerifiedBlock,
|
||||
@ -19,7 +19,7 @@ use tokio::sync::mpsc;
|
||||
use types::{
|
||||
Attestation, AttesterSlashing, EthSpec, Hash256, IndexedAttestation, ProposerSlashing,
|
||||
SignedAggregateAndProof, SignedBeaconBlock, SignedContributionAndProof, SignedVoluntaryExit,
|
||||
SubnetId, SyncCommitteeMessage, SyncSubnetId,
|
||||
Slot, SubnetId, SyncCommitteeMessage, SyncSubnetId,
|
||||
};
|
||||
|
||||
use super::{
|
||||
@ -100,12 +100,7 @@ enum FailedAtt<T: EthSpec> {
|
||||
|
||||
impl<T: EthSpec> FailedAtt<T> {
|
||||
pub fn beacon_block_root(&self) -> &Hash256 {
|
||||
match self {
|
||||
FailedAtt::Unaggregate { attestation, .. } => &attestation.data.beacon_block_root,
|
||||
FailedAtt::Aggregate { attestation, .. } => {
|
||||
&attestation.message.aggregate.data.beacon_block_root
|
||||
}
|
||||
}
|
||||
&self.attestation().data.beacon_block_root
|
||||
}
|
||||
|
||||
pub fn kind(&self) -> &'static str {
|
||||
@ -114,6 +109,13 @@ impl<T: EthSpec> FailedAtt<T> {
|
||||
FailedAtt::Aggregate { .. } => "aggregated",
|
||||
}
|
||||
}
|
||||
|
||||
pub fn attestation(&self) -> &Attestation<T> {
|
||||
match self {
|
||||
FailedAtt::Unaggregate { attestation, .. } => attestation,
|
||||
FailedAtt::Aggregate { attestation, .. } => &attestation.message.aggregate,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Items required to verify a batch of unaggregated gossip attestations.
|
||||
@ -410,6 +412,7 @@ impl<T: BeaconChainTypes> Worker<T> {
|
||||
},
|
||||
reprocess_tx,
|
||||
error,
|
||||
seen_timestamp,
|
||||
);
|
||||
}
|
||||
}
|
||||
@ -608,6 +611,7 @@ impl<T: BeaconChainTypes> Worker<T> {
|
||||
},
|
||||
reprocess_tx,
|
||||
error,
|
||||
seen_timestamp,
|
||||
);
|
||||
}
|
||||
}
|
||||
@ -1117,6 +1121,7 @@ impl<T: BeaconChainTypes> Worker<T> {
|
||||
subnet_id: SyncSubnetId,
|
||||
seen_timestamp: Duration,
|
||||
) {
|
||||
let message_slot = sync_signature.slot;
|
||||
let sync_signature = match self
|
||||
.chain
|
||||
.verify_sync_committee_message_for_gossip(sync_signature, subnet_id)
|
||||
@ -1128,6 +1133,8 @@ impl<T: BeaconChainTypes> Worker<T> {
|
||||
message_id,
|
||||
"sync_signature",
|
||||
e,
|
||||
message_slot,
|
||||
seen_timestamp,
|
||||
);
|
||||
return;
|
||||
}
|
||||
@ -1177,6 +1184,7 @@ impl<T: BeaconChainTypes> Worker<T> {
|
||||
sync_contribution: SignedContributionAndProof<T::EthSpec>,
|
||||
seen_timestamp: Duration,
|
||||
) {
|
||||
let contribution_slot = sync_contribution.message.contribution.slot;
|
||||
let sync_contribution = match self
|
||||
.chain
|
||||
.verify_sync_contribution_for_gossip(sync_contribution)
|
||||
@ -1189,6 +1197,8 @@ impl<T: BeaconChainTypes> Worker<T> {
|
||||
message_id,
|
||||
"sync_contribution",
|
||||
e,
|
||||
contribution_slot,
|
||||
seen_timestamp,
|
||||
);
|
||||
return;
|
||||
}
|
||||
@ -1232,6 +1242,7 @@ impl<T: BeaconChainTypes> Worker<T> {
|
||||
failed_att: FailedAtt<T::EthSpec>,
|
||||
reprocess_tx: Option<mpsc::Sender<ReprocessQueueMessage<T>>>,
|
||||
error: AttnError,
|
||||
seen_timestamp: Duration,
|
||||
) {
|
||||
let beacon_block_root = failed_att.beacon_block_root();
|
||||
let attestation_type = failed_att.kind();
|
||||
@ -1239,8 +1250,7 @@ impl<T: BeaconChainTypes> Worker<T> {
|
||||
match &error {
|
||||
AttnError::FutureEpoch { .. }
|
||||
| AttnError::PastEpoch { .. }
|
||||
| AttnError::FutureSlot { .. }
|
||||
| AttnError::PastSlot { .. } => {
|
||||
| AttnError::FutureSlot { .. } => {
|
||||
/*
|
||||
* These errors can be triggered by a mismatch between our slot and the peer.
|
||||
*
|
||||
@ -1262,6 +1272,24 @@ impl<T: BeaconChainTypes> Worker<T> {
|
||||
// Do not propagate these messages.
|
||||
self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Ignore);
|
||||
}
|
||||
AttnError::PastSlot { .. } => {
|
||||
// Produce a slot clock frozen at the time we received the message from the
|
||||
// network.
|
||||
let seen_clock = &self.chain.slot_clock.freeze_at(seen_timestamp);
|
||||
let hindsight_verification =
|
||||
attestation_verification::verify_propagation_slot_range(
|
||||
seen_clock,
|
||||
failed_att.attestation(),
|
||||
);
|
||||
|
||||
// Only penalize the peer if it would have been invalid at the moment we received
|
||||
// it.
|
||||
if hindsight_verification.is_err() {
|
||||
self.gossip_penalize_peer(peer_id, PeerAction::LowToleranceError);
|
||||
}
|
||||
|
||||
self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Ignore);
|
||||
}
|
||||
AttnError::InvalidSelectionProof { .. } | AttnError::InvalidSignature => {
|
||||
/*
|
||||
* These errors are caused by invalid signatures.
|
||||
@ -1625,6 +1653,8 @@ impl<T: BeaconChainTypes> Worker<T> {
|
||||
message_id: MessageId,
|
||||
message_type: &str,
|
||||
error: SyncCommitteeError,
|
||||
sync_committee_message_slot: Slot,
|
||||
seen_timestamp: Duration,
|
||||
) {
|
||||
metrics::register_sync_committee_error(&error);
|
||||
|
||||
@ -1650,10 +1680,7 @@ impl<T: BeaconChainTypes> Worker<T> {
|
||||
// Do not propagate these messages.
|
||||
self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Ignore);
|
||||
}
|
||||
SyncCommitteeError::PastSlot {
|
||||
message_slot,
|
||||
earliest_permissible_slot,
|
||||
} => {
|
||||
SyncCommitteeError::PastSlot { .. } => {
|
||||
/*
|
||||
* This error can be triggered by a mismatch between our slot and the peer.
|
||||
*
|
||||
@ -1667,12 +1694,34 @@ impl<T: BeaconChainTypes> Worker<T> {
|
||||
"type" => ?message_type,
|
||||
);
|
||||
|
||||
// We tolerate messages that were just one slot late.
|
||||
if *message_slot + 1 < *earliest_permissible_slot {
|
||||
// Compute the slot when we received the message.
|
||||
let received_slot = self
|
||||
.chain
|
||||
.slot_clock
|
||||
.slot_of(seen_timestamp)
|
||||
.unwrap_or_else(|| self.chain.slot_clock.genesis_slot());
|
||||
|
||||
// The message is "excessively" late if it was more than one slot late.
|
||||
let excessively_late = received_slot > sync_committee_message_slot + 1;
|
||||
|
||||
// This closure will lazily produce a slot clock frozen at the time we received the
|
||||
// message from the network and return a bool indicating if the message was invalid
|
||||
// at the time of receipt too.
|
||||
let invalid_in_hindsight = || {
|
||||
let seen_clock = &self.chain.slot_clock.freeze_at(seen_timestamp);
|
||||
let hindsight_verification =
|
||||
sync_committee_verification::verify_propagation_slot_range(
|
||||
seen_clock,
|
||||
&sync_committee_message_slot,
|
||||
);
|
||||
hindsight_verification.is_err()
|
||||
};
|
||||
|
||||
// Penalize the peer if the message was more than one slot late
|
||||
if excessively_late && invalid_in_hindsight() {
|
||||
self.gossip_penalize_peer(peer_id, PeerAction::HighToleranceError);
|
||||
}
|
||||
|
||||
// Do not propagate these messages.
|
||||
self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Ignore);
|
||||
}
|
||||
SyncCommitteeError::EmptyAggregationBitfield => {
|
||||
|
@ -112,4 +112,18 @@ pub trait SlotClock: Send + Sync + Sized + Clone {
|
||||
Duration::from_secs(duration_into_slot.as_secs() % seconds_per_slot)
|
||||
})
|
||||
}
|
||||
|
||||
/// Produces a *new* slot clock with the same configuration of `self`, except that clock is
|
||||
/// "frozen" at the `freeze_at` time.
|
||||
///
|
||||
/// This is useful for observing the slot clock at arbitrary fixed points in time.
|
||||
fn freeze_at(&self, freeze_at: Duration) -> ManualSlotClock {
|
||||
let slot_clock = ManualSlotClock::new(
|
||||
self.genesis_slot(),
|
||||
self.genesis_duration(),
|
||||
self.slot_duration(),
|
||||
);
|
||||
slot_clock.set_current_time(freeze_at);
|
||||
slot_clock
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user