lighthouse/beacon_node/beacon_chain/src/observed_aggregates.rs
Pawan Dhananjay 448d3ec9b3 Aggregate subsets (#3493)
## Issue Addressed

Resolves #3238 

## Proposed Changes

Please list or describe the changes introduced by this PR.

## Additional Info

Please provide any additional information. For example, future considerations
or information useful for reviewers.
2023-06-27 01:06:49 +00:00

611 lines
22 KiB
Rust

//! Provides an `ObservedAggregates` struct which allows us to reject aggregated attestations or
//! sync committee contributions if we've already seen them.
use crate::sync_committee_verification::SyncCommitteeData;
use ssz_types::{BitList, BitVector};
use std::collections::HashMap;
use std::marker::PhantomData;
use tree_hash::TreeHash;
use types::consts::altair::{
SYNC_COMMITTEE_SUBNET_COUNT, TARGET_AGGREGATORS_PER_SYNC_SUBCOMMITTEE,
};
use types::slot_data::SlotData;
use types::{Attestation, EthSpec, Hash256, Slot, SyncCommitteeContribution};
pub type ObservedSyncContributions<E> = ObservedAggregates<
SyncCommitteeContribution<E>,
E,
BitVector<<E as types::EthSpec>::SyncSubcommitteeSize>,
>;
pub type ObservedAggregateAttestations<E> = ObservedAggregates<
Attestation<E>,
E,
BitList<<E as types::EthSpec>::MaxValidatorsPerCommittee>,
>;
/// A trait use to associate capacity constants with the type being stored in `ObservedAggregates`.
pub trait Consts {
/// The default capacity of items stored per slot, in a single `SlotHashSet`.
const DEFAULT_PER_SLOT_CAPACITY: usize;
/// The maximum number of slots
fn max_slot_capacity() -> usize;
/// The maximum number of items stored per slot, in a single `SlotHashSet`.
fn max_per_slot_capacity() -> usize;
}
impl<T: EthSpec> Consts for Attestation<T> {
/// Use 128 as it's the target committee size for the mainnet spec. This is perhaps a little
/// wasteful for the minimal spec, but considering it's approx. 128 * 32 bytes we're not wasting
/// much.
const DEFAULT_PER_SLOT_CAPACITY: usize = 128;
/// We need to keep attestations for each slot of the current epoch.
fn max_slot_capacity() -> usize {
T::slots_per_epoch() as usize
}
/// As a DoS protection measure, the maximum number of distinct `Attestations` or
/// `SyncCommitteeContributions` that will be recorded for each slot.
///
/// Currently this is set to ~524k. If we say that each entry is 40 bytes (Hash256 (32 bytes) + an
/// 8 byte hash) then this comes to about 20mb per slot. If we're storing 34 of these slots, then
/// we're at 680mb. This is a lot of memory usage, but probably not a show-stopper for most
/// reasonable hardware.
///
/// Upstream conditions should strongly restrict the amount of attestations that can show up in
/// this pool. The maximum size with respect to upstream restrictions is more likely on the order
/// of the number of validators.
fn max_per_slot_capacity() -> usize {
1 << 19 // 524,288
}
}
impl<T: EthSpec> Consts for SyncCommitteeContribution<T> {
/// Set to `TARGET_AGGREGATORS_PER_SYNC_SUBCOMMITTEE * SYNC_COMMITTEE_SUBNET_COUNT`. This is the
/// expected number of aggregators per slot across all subcommittees.
const DEFAULT_PER_SLOT_CAPACITY: usize =
(SYNC_COMMITTEE_SUBNET_COUNT * TARGET_AGGREGATORS_PER_SYNC_SUBCOMMITTEE) as usize;
/// We only need to keep contributions related to the current slot.
fn max_slot_capacity() -> usize {
1
}
/// We should never receive more aggregates than there are sync committee participants.
fn max_per_slot_capacity() -> usize {
T::sync_committee_size()
}
}
/// A trait for types that implement a behaviour where one object of that type
/// can be a subset/superset of another.
/// This trait allows us to be generic over the aggregate item that we store in the cache that
/// we want to prevent duplicates/subsets for.
pub trait SubsetItem {
/// The item that is stored for later comparison with new incoming aggregate items.
type Item;
/// Returns `true` if `self` is a non-strict subset of `other` and `false` otherwise.
fn is_subset(&self, other: &Self::Item) -> bool;
/// Returns `true` if `self` is a non-strict superset of `other` and `false` otherwise.
fn is_superset(&self, other: &Self::Item) -> bool;
/// Returns the item that gets stored in `ObservedAggregates` for later subset
/// comparison with incoming aggregates.
fn get_item(&self) -> Self::Item;
/// Returns a unique value that keys the object to the item that is being stored
/// in `ObservedAggregates`.
fn root(&self) -> Hash256;
}
impl<T: EthSpec> SubsetItem for Attestation<T> {
type Item = BitList<T::MaxValidatorsPerCommittee>;
fn is_subset(&self, other: &Self::Item) -> bool {
self.aggregation_bits.is_subset(other)
}
fn is_superset(&self, other: &Self::Item) -> bool {
other.is_subset(&self.aggregation_bits)
}
/// Returns the sync contribution aggregation bits.
fn get_item(&self) -> Self::Item {
self.aggregation_bits.clone()
}
/// Returns the hash tree root of the attestation data.
fn root(&self) -> Hash256 {
self.data.tree_hash_root()
}
}
impl<T: EthSpec> SubsetItem for SyncCommitteeContribution<T> {
type Item = BitVector<T::SyncSubcommitteeSize>;
fn is_subset(&self, other: &Self::Item) -> bool {
self.aggregation_bits.is_subset(other)
}
fn is_superset(&self, other: &Self::Item) -> bool {
other.is_subset(&self.aggregation_bits)
}
/// Returns the sync contribution aggregation bits.
fn get_item(&self) -> Self::Item {
self.aggregation_bits.clone()
}
/// Returns the hash tree root of the root, slot and subcommittee index
/// of the sync contribution.
fn root(&self) -> Hash256 {
SyncCommitteeData {
root: self.beacon_block_root,
slot: self.slot,
subcommittee_index: self.subcommittee_index,
}
.tree_hash_root()
}
}
#[derive(Debug, PartialEq)]
pub enum ObserveOutcome {
/// This item is a non-strict subset of an already known item.
Subset,
/// This was the first time this item was observed.
New,
}
#[derive(Debug, PartialEq)]
pub enum Error {
SlotTooLow {
slot: Slot,
lowest_permissible_slot: Slot,
},
/// The function to obtain a set index failed, this is an internal error.
InvalidSetIndex(usize),
/// We have reached the maximum number of unique items that can be observed in a slot.
/// This is a DoS protection function.
ReachedMaxObservationsPerSlot(usize),
IncorrectSlot {
expected: Slot,
attestation: Slot,
},
}
/// A `HashMap` that contains entries related to some `Slot`.
struct SlotHashSet<I> {
/// Contains a vector of maximally-sized aggregation bitfields/bitvectors
/// such that no bitfield/bitvector is a subset of any other in the list.
map: HashMap<Hash256, Vec<I>>,
slot: Slot,
max_capacity: usize,
}
impl<I> SlotHashSet<I> {
pub fn new(slot: Slot, initial_capacity: usize, max_capacity: usize) -> Self {
Self {
slot,
map: HashMap::with_capacity(initial_capacity),
max_capacity,
}
}
/// Store the items in self so future observations recognise its existence.
pub fn observe_item<S: SlotData + SubsetItem<Item = I>>(
&mut self,
item: &S,
root: Hash256,
) -> Result<ObserveOutcome, Error> {
if item.get_slot() != self.slot {
return Err(Error::IncorrectSlot {
expected: self.slot,
attestation: item.get_slot(),
});
}
if let Some(aggregates) = self.map.get_mut(&root) {
for existing in aggregates {
// Check if `item` is a subset of any of the observed aggregates
if item.is_subset(existing) {
return Ok(ObserveOutcome::Subset);
// Check if `item` is a superset of any of the observed aggregates
// If true, we replace the new item with its existing subset. This allows us
// to hold fewer items in the list.
} else if item.is_superset(existing) {
*existing = item.get_item();
return Ok(ObserveOutcome::New);
}
}
}
// Here we check to see if this slot has reached the maximum observation count.
//
// The resulting behaviour is that we are no longer able to successfully observe new
// items, however we will continue to return `is_known_subset` values. We could also
// disable `is_known_subset`, however then we would stop forwarding items across the
// gossip network and I think that this is a worse case than sending some invalid ones.
// The underlying libp2p network is responsible for removing duplicate messages, so
// this doesn't risk a broadcast loop.
if self.map.len() >= self.max_capacity {
return Err(Error::ReachedMaxObservationsPerSlot(self.max_capacity));
}
let item = item.get_item();
self.map.entry(root).or_default().push(item);
Ok(ObserveOutcome::New)
}
/// Check if `item` is a non-strict subset of any of the already observed aggregates for
/// the given root and slot.
pub fn is_known_subset<S: SlotData + SubsetItem<Item = I>>(
&self,
item: &S,
root: Hash256,
) -> Result<bool, Error> {
if item.get_slot() != self.slot {
return Err(Error::IncorrectSlot {
expected: self.slot,
attestation: item.get_slot(),
});
}
Ok(self
.map
.get(&root)
.map_or(false, |agg| agg.iter().any(|val| item.is_subset(val))))
}
/// The number of observed items in `self`.
pub fn len(&self) -> usize {
self.map.len()
}
}
/// Stores the roots of objects for some number of `Slots`, so we can determine if
/// these have previously been seen on the network.
pub struct ObservedAggregates<T: SlotData + Consts, E: EthSpec, I> {
lowest_permissible_slot: Slot,
sets: Vec<SlotHashSet<I>>,
_phantom_spec: PhantomData<E>,
_phantom_tree_hash: PhantomData<T>,
}
impl<T: SlotData + Consts, E: EthSpec, I> Default for ObservedAggregates<T, E, I> {
fn default() -> Self {
Self {
lowest_permissible_slot: Slot::new(0),
sets: vec![],
_phantom_spec: PhantomData,
_phantom_tree_hash: PhantomData,
}
}
}
impl<T: SlotData + Consts + SubsetItem<Item = I>, E: EthSpec, I> ObservedAggregates<T, E, I> {
/// Store `item` in `self` keyed at `root`.
///
/// `root` must equal `item.root::<SubsetItem>()`.
pub fn observe_item(
&mut self,
item: &T,
root_opt: Option<Hash256>,
) -> Result<ObserveOutcome, Error> {
let index = self.get_set_index(item.get_slot())?;
let root = root_opt.unwrap_or_else(|| item.root());
self.sets
.get_mut(index)
.ok_or(Error::InvalidSetIndex(index))
.and_then(|set| set.observe_item(item, root))
}
/// Check if `item` is a non-strict subset of any of the already observed aggregates for
/// the given root and slot.
///
/// `root` must equal `item.root::<SubsetItem>()`.
#[allow(clippy::wrong_self_convention)]
pub fn is_known_subset(&mut self, item: &T, root: Hash256) -> Result<bool, Error> {
let index = self.get_set_index(item.get_slot())?;
self.sets
.get(index)
.ok_or(Error::InvalidSetIndex(index))
.and_then(|set| set.is_known_subset(item, root))
}
/// The maximum number of slots that items are stored for.
fn max_capacity(&self) -> u64 {
// We add `2` in order to account for one slot either side of the range due to
// `MAXIMUM_GOSSIP_CLOCK_DISPARITY`.
(T::max_slot_capacity() + 2) as u64
}
/// Removes any items with a slot lower than `current_slot` and bars any future
/// item with a slot lower than `current_slot - SLOTS_RETAINED`.
pub fn prune(&mut self, current_slot: Slot) {
let lowest_permissible_slot = current_slot.saturating_sub(self.max_capacity() - 1);
self.sets.retain(|set| set.slot >= lowest_permissible_slot);
self.lowest_permissible_slot = lowest_permissible_slot;
}
/// Returns the index of `self.set` that matches `slot`.
///
/// If there is no existing set for this slot one will be created. If `self.sets.len() >=
/// Self::max_capacity()`, the set with the lowest slot will be replaced.
fn get_set_index(&mut self, slot: Slot) -> Result<usize, Error> {
let lowest_permissible_slot = self.lowest_permissible_slot;
if slot < lowest_permissible_slot {
return Err(Error::SlotTooLow {
slot,
lowest_permissible_slot,
});
}
// Prune the pool if this item indicates that the current slot has advanced.
if lowest_permissible_slot + self.max_capacity() < slot + 1 {
self.prune(slot)
}
if let Some(index) = self.sets.iter().position(|set| set.slot == slot) {
return Ok(index);
}
// To avoid re-allocations, try and determine a rough initial capacity for the new set
// by obtaining the mean size of all items in earlier epoch.
let (count, sum) = self
.sets
.iter()
// Only include slots that are less than the given slot in the average. This should
// generally avoid including recent slots that are still "filling up".
.filter(|set| set.slot < slot)
.map(|set| set.len())
.fold((0, 0), |(count, sum), len| (count + 1, sum + len));
// If we are unable to determine an average, just use the `self.default_per_slot_capacity`.
let initial_capacity = sum
.checked_div(count)
.unwrap_or(T::DEFAULT_PER_SLOT_CAPACITY);
if self.sets.len() < self.max_capacity() as usize || self.sets.is_empty() {
let index = self.sets.len();
self.sets.push(SlotHashSet::new(
slot,
initial_capacity,
T::max_per_slot_capacity(),
));
return Ok(index);
}
let index = self
.sets
.iter()
.enumerate()
.min_by_key(|(_i, set)| set.slot)
.map(|(i, _set)| i)
.expect("sets cannot be empty due to previous .is_empty() check");
self.sets[index] = SlotHashSet::new(slot, initial_capacity, T::max_per_slot_capacity());
Ok(index)
}
}
#[cfg(test)]
#[cfg(not(debug_assertions))]
mod tests {
use super::*;
use types::{test_utils::test_random_instance, Hash256};
type E = types::MainnetEthSpec;
fn get_attestation(slot: Slot, beacon_block_root: u64) -> Attestation<E> {
let mut a: Attestation<E> = test_random_instance();
a.data.slot = slot;
a.data.beacon_block_root = Hash256::from_low_u64_be(beacon_block_root);
a
}
fn get_sync_contribution(slot: Slot, beacon_block_root: u64) -> SyncCommitteeContribution<E> {
let mut a: SyncCommitteeContribution<E> = test_random_instance();
a.slot = slot;
a.beacon_block_root = Hash256::from_low_u64_be(beacon_block_root);
a
}
macro_rules! test_suite {
($mod_name: ident, $type: ident, $method_name: ident) => {
#[cfg(test)]
mod $mod_name {
use super::*;
const NUM_ELEMENTS: usize = 8;
fn single_slot_test(store: &mut $type<E>, slot: Slot) {
let items = (0..NUM_ELEMENTS as u64)
.map(|i| $method_name(slot, i))
.collect::<Vec<_>>();
for a in &items {
assert_eq!(
store.is_known_subset(a, a.root()),
Ok(false),
"should indicate an unknown attestation is unknown"
);
assert_eq!(
store.observe_item(a, None),
Ok(ObserveOutcome::New),
"should observe new attestation"
);
}
for a in &items {
assert_eq!(
store.is_known_subset(a, a.root()),
Ok(true),
"should indicate a known attestation is known"
);
assert_eq!(
store.observe_item(a, Some(a.root())),
Ok(ObserveOutcome::Subset),
"should acknowledge an existing attestation"
);
}
}
#[test]
fn single_slot() {
let mut store = $type::default();
single_slot_test(&mut store, Slot::new(0));
assert_eq!(store.sets.len(), 1, "should have a single set stored");
assert_eq!(
store.sets[0].len(),
NUM_ELEMENTS,
"set should have NUM_ELEMENTS elements"
);
}
#[test]
fn mulitple_contiguous_slots() {
let mut store = $type::default();
let max_cap = store.max_capacity();
for i in 0..max_cap * 3 {
let slot = Slot::new(i);
single_slot_test(&mut store, slot);
/*
* Ensure that the number of sets is correct.
*/
if i < max_cap {
assert_eq!(
store.sets.len(),
i as usize + 1,
"should have a {} sets stored",
i + 1
);
} else {
assert_eq!(
store.sets.len(),
max_cap as usize,
"should have max_capacity sets stored"
);
}
/*
* Ensure that each set contains the correct number of elements.
*/
for set in &store.sets[..] {
assert_eq!(
set.len(),
NUM_ELEMENTS,
"each store should have NUM_ELEMENTS elements"
)
}
/*
* Ensure that all the sets have the expected slots
*/
let mut store_slots =
store.sets.iter().map(|set| set.slot).collect::<Vec<_>>();
assert!(
store_slots.len() <= store.max_capacity() as usize,
"store size should not exceed max"
);
store_slots.sort_unstable();
let expected_slots = (i.saturating_sub(max_cap - 1)..=i)
.map(Slot::new)
.collect::<Vec<_>>();
assert_eq!(expected_slots, store_slots, "should have expected slots");
}
}
#[test]
fn mulitple_non_contiguous_slots() {
let mut store = $type::default();
let max_cap = store.max_capacity();
let to_skip = vec![1_u64, 2, 3, 5, 6, 29, 30, 31, 32, 64];
let slots = (0..max_cap * 3)
.into_iter()
.filter(|i| !to_skip.contains(i))
.collect::<Vec<_>>();
for &i in &slots {
if to_skip.contains(&i) {
continue;
}
let slot = Slot::from(i);
single_slot_test(&mut store, slot);
/*
* Ensure that each set contains the correct number of elements.
*/
for set in &store.sets[..] {
assert_eq!(
set.len(),
NUM_ELEMENTS,
"each store should have NUM_ELEMENTS elements"
)
}
/*
* Ensure that all the sets have the expected slots
*/
let mut store_slots =
store.sets.iter().map(|set| set.slot).collect::<Vec<_>>();
store_slots.sort_unstable();
assert!(
store_slots.len() <= store.max_capacity() as usize,
"store size should not exceed max"
);
let lowest = store.lowest_permissible_slot.as_u64();
let highest = slot.as_u64();
let expected_slots = (lowest..=highest)
.filter(|i| !to_skip.contains(i))
.map(Slot::new)
.collect::<Vec<_>>();
assert_eq!(
expected_slots,
&store_slots[..],
"should have expected slots"
);
}
}
}
};
}
test_suite!(
observed_sync_aggregates,
ObservedSyncContributions,
get_sync_contribution
);
test_suite!(
observed_aggregate_attestations,
ObservedAggregateAttestations,
get_attestation
);
}