Refactor discovery queries (#2518)

## Issue Addressed

N/A

## Proposed Changes

Refactor discovery queries such that only `QueryType::Subnet` queries are queued for grouping. `QueryType::FindPeers` is always made regardless of the number of active `Subnet` queries (max = 2). This prevents `FindPeers` queries from getting starved if `Subnet` queries start queuing up. 

Also removes `GroupedQueryType` struct and use `QueryType` for all queuing and processing of discovery requests.

## Additional Info

Currently, no distinction is made between subnet discovery queries for attestation and sync committee subnets when grouping queries. Potentially we could prioritise attestation queries over sync committee queries in the future.
This commit is contained in:
Pawan Dhananjay 2021-08-24 00:12:13 +00:00
parent 90d5ab1566
commit 02fd54bea7

View File

@ -53,8 +53,11 @@ pub const TARGET_SUBNET_PEERS: usize = config::MESH_N_LOW;
const TARGET_PEERS_FOR_GROUPED_QUERY: usize = 6; const TARGET_PEERS_FOR_GROUPED_QUERY: usize = 6;
/// Number of times to attempt a discovery request. /// Number of times to attempt a discovery request.
const MAX_DISCOVERY_RETRY: usize = 3; const MAX_DISCOVERY_RETRY: usize = 3;
/// The maximum number of concurrent discovery queries. /// The maximum number of concurrent subnet discovery queries.
const MAX_CONCURRENT_QUERIES: usize = 2; /// Note: we always allow a single FindPeers query, so we would be
/// running a maximum of `MAX_CONCURRENT_SUBNET_QUERIES + 1`
/// discovery queries at a time.
const MAX_CONCURRENT_SUBNET_QUERIES: usize = 2;
/// The max number of subnets to search for in a single subnet discovery query. /// The max number of subnets to search for in a single subnet discovery query.
const MAX_SUBNETS_IN_QUERY: usize = 3; const MAX_SUBNETS_IN_QUERY: usize = 3;
/// The number of closest peers to search for when doing a regular peer search. /// The number of closest peers to search for when doing a regular peer search.
@ -81,6 +84,19 @@ struct SubnetQuery {
retries: usize, retries: usize,
} }
impl SubnetQuery {
/// Returns true if this query has expired.
pub fn expired(&self) -> bool {
if let Some(ttl) = self.min_ttl {
ttl < Instant::now()
}
// `None` corresponds to long lived subnet discovery requests.
else {
false
}
}
}
impl std::fmt::Debug for SubnetQuery { impl std::fmt::Debug for SubnetQuery {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let min_ttl_secs = self let min_ttl_secs = self
@ -97,37 +113,16 @@ impl std::fmt::Debug for SubnetQuery {
#[derive(Debug, Clone, PartialEq)] #[derive(Debug, Clone, PartialEq)]
enum QueryType { enum QueryType {
/// We are searching for subnet peers. /// We are searching for subnet peers.
Subnet(SubnetQuery),
/// We are searching for more peers without ENR or time constraints.
FindPeers,
}
#[derive(Debug, Clone, PartialEq)]
enum GroupedQueryType {
/// We are searching for peers on one of a few subnets.
Subnet(Vec<SubnetQuery>), Subnet(Vec<SubnetQuery>),
/// We are searching for more peers without ENR or time constraints. /// We are searching for more peers without ENR or time constraints.
FindPeers, FindPeers,
} }
impl QueryType {
/// Returns true if this query has expired.
pub fn expired(&self) -> bool {
match self {
Self::FindPeers => false,
Self::Subnet(subnet_query) => {
if let Some(ttl) = subnet_query.min_ttl {
ttl < Instant::now()
} else {
true
}
}
}
}
}
/// The result of a query. /// The result of a query.
struct QueryResult(GroupedQueryType, Result<Vec<Enr>, discv5::QueryError>); struct QueryResult {
query_type: QueryType,
result: Result<Vec<Enr>, discv5::QueryError>,
}
// Awaiting the event stream future // Awaiting the event stream future
enum EventStream { enum EventStream {
@ -169,8 +164,8 @@ pub struct Discovery<TSpec: EthSpec> {
/// a time, regardless of the query concurrency. /// a time, regardless of the query concurrency.
find_peer_active: bool, find_peer_active: bool,
/// A queue of discovery queries to be processed. /// A queue of subnet queries to be processed.
queued_queries: VecDeque<QueryType>, queued_queries: VecDeque<SubnetQuery>,
/// Active discovery queries. /// Active discovery queries.
active_queries: FuturesUnordered<std::pin::Pin<Box<dyn Future<Output = QueryResult> + Send>>>, active_queries: FuturesUnordered<std::pin::Pin<Box<dyn Future<Output = QueryResult> + Send>>>,
@ -328,15 +323,12 @@ impl<TSpec: EthSpec> Discovery<TSpec> {
if !self.started || self.find_peer_active { if !self.started || self.find_peer_active {
return; return;
} }
// Immediately start a FindNode query
// If there is not already a find peer's query queued, add one debug!(self.log, "Starting a peer discovery request");
let query = QueryType::FindPeers; self.find_peer_active = true;
if !self.queued_queries.contains(&query) { self.start_query(QueryType::FindPeers, FIND_NODE_QUERY_CLOSEST_PEERS, |_| {
debug!(self.log, "Queuing a peer discovery request"); true
self.queued_queries.push_back(query); });
// update the metrics
metrics::set_gauge(&metrics::DISCOVERY_QUEUE, self.queued_queries.len() as i64);
}
} }
/// Processes a request to search for more peers on a subnet. /// Processes a request to search for more peers on a subnet.
@ -347,7 +339,7 @@ impl<TSpec: EthSpec> Discovery<TSpec> {
} }
trace!( trace!(
self.log, self.log,
"Making discovery query for subnets"; "Starting discovery query for subnets";
"subnets" => ?subnets_to_discover.iter().map(|s| s.subnet).collect::<Vec<_>>() "subnets" => ?subnets_to_discover.iter().map(|s| s.subnet).collect::<Vec<_>>()
); );
for subnet in subnets_to_discover { for subnet in subnets_to_discover {
@ -612,8 +604,7 @@ impl<TSpec: EthSpec> Discovery<TSpec> {
// Search through any queued requests and update the timeout if a query for this subnet // Search through any queued requests and update the timeout if a query for this subnet
// already exists // already exists
let mut found = false; let mut found = false;
for query in self.queued_queries.iter_mut() { for subnet_query in self.queued_queries.iter_mut() {
if let QueryType::Subnet(ref mut subnet_query) = query {
if subnet_query.subnet == subnet { if subnet_query.subnet == subnet {
if subnet_query.min_ttl < min_ttl { if subnet_query.min_ttl < min_ttl {
subnet_query.min_ttl = min_ttl; subnet_query.min_ttl = min_ttl;
@ -625,17 +616,14 @@ impl<TSpec: EthSpec> Discovery<TSpec> {
break; break;
} }
} }
}
if !found { if !found {
// Set up the query and add it to the queue // update the metrics and insert into the queue.
let query = QueryType::Subnet(SubnetQuery { trace!(self.log, "Queuing subnet query"; "subnet" => ?subnet, "retries" => retries);
self.queued_queries.push_back(SubnetQuery {
subnet, subnet,
min_ttl, min_ttl,
retries, retries,
}); });
// update the metrics and insert into the queue.
trace!(self.log, "Queuing subnet query"; "subnet" => ?subnet, "retries" => retries);
self.queued_queries.push_back(query);
metrics::set_gauge(&metrics::DISCOVERY_QUEUE, self.queued_queries.len() as i64); metrics::set_gauge(&metrics::DISCOVERY_QUEUE, self.queued_queries.len() as i64);
} }
} }
@ -643,8 +631,8 @@ impl<TSpec: EthSpec> Discovery<TSpec> {
/// Consume the discovery queue and initiate queries when applicable. /// Consume the discovery queue and initiate queries when applicable.
/// ///
/// This also sanitizes the queue removing out-dated queries. /// This also sanitizes the queue removing out-dated queries.
/// Returns `true` if any of the queued queries is processed and a discovery /// Returns `true` if any of the queued queries is processed and a subnet discovery
/// query (Subnet or FindPeers) is started. /// query is started.
fn process_queue(&mut self) -> bool { fn process_queue(&mut self) -> bool {
// Sanitize the queue, removing any out-dated subnet queries // Sanitize the queue, removing any out-dated subnet queries
self.queued_queries.retain(|query| !query.expired()); self.queued_queries.retain(|query| !query.expired());
@ -655,36 +643,13 @@ impl<TSpec: EthSpec> Discovery<TSpec> {
// Check that we are within our query concurrency limit // Check that we are within our query concurrency limit
while !self.at_capacity() && !self.queued_queries.is_empty() { while !self.at_capacity() && !self.queued_queries.is_empty() {
// consume and process the query queue // consume and process the query queue
match self.queued_queries.pop_front() { if let Some(subnet_query) = self.queued_queries.pop_front() {
Some(QueryType::FindPeers) => {
// Only start a find peers query if it is the last message in the queue.
// We want to prioritize subnet queries, so we don't miss attestations.
if self.queued_queries.is_empty() {
// This is a regular request to find additional peers
debug!(self.log, "Discovery query started");
self.find_peer_active = true;
self.start_query(
GroupedQueryType::FindPeers,
FIND_NODE_QUERY_CLOSEST_PEERS,
|_| true,
);
processed = true;
} else {
self.queued_queries.push_back(QueryType::FindPeers);
}
}
Some(QueryType::Subnet(subnet_query)) => {
subnet_queries.push(subnet_query); subnet_queries.push(subnet_query);
// We want to start a grouped subnet query if: // We want to start a grouped subnet query if:
// 1. We've grouped MAX_SUBNETS_IN_QUERY subnets together. // 1. We've grouped MAX_SUBNETS_IN_QUERY subnets together.
// 2. There are no more messages in the queue. // 2. There are no more messages in the queue.
// 3. There is exactly one message in the queue and it is FindPeers. if subnet_queries.len() == MAX_SUBNETS_IN_QUERY || self.queued_queries.is_empty() {
if subnet_queries.len() == MAX_SUBNETS_IN_QUERY
|| self.queued_queries.is_empty()
|| (self.queued_queries.front() == Some(&QueryType::FindPeers)
&& self.queued_queries.len() == 1)
{
// This query is for searching for peers of a particular subnet // This query is for searching for peers of a particular subnet
// Drain subnet_queries so we can re-use it as we continue to process the queue // Drain subnet_queries so we can re-use it as we continue to process the queue
let grouped_queries: Vec<SubnetQuery> = subnet_queries.drain(..).collect(); let grouped_queries: Vec<SubnetQuery> = subnet_queries.drain(..).collect();
@ -692,8 +657,6 @@ impl<TSpec: EthSpec> Discovery<TSpec> {
processed = true; processed = true;
} }
} }
None => {} // Queue is empty
}
} }
// Update the queue metric // Update the queue metric
metrics::set_gauge(&metrics::DISCOVERY_QUEUE, self.queued_queries.len() as i64); metrics::set_gauge(&metrics::DISCOVERY_QUEUE, self.queued_queries.len() as i64);
@ -701,9 +664,12 @@ impl<TSpec: EthSpec> Discovery<TSpec> {
} }
// Returns a boolean indicating if we are currently processing the maximum number of // Returns a boolean indicating if we are currently processing the maximum number of
// concurrent queries or not. // concurrent subnet queries or not.
fn at_capacity(&self) -> bool { fn at_capacity(&self) -> bool {
self.active_queries.len() >= MAX_CONCURRENT_QUERIES self.active_queries
.len()
.saturating_sub(self.find_peer_active as usize) // We only count active subnet queries
>= MAX_CONCURRENT_SUBNET_QUERIES
} }
/// Runs a discovery request for a given group of subnets. /// Runs a discovery request for a given group of subnets.
@ -754,7 +720,7 @@ impl<TSpec: EthSpec> Discovery<TSpec> {
"subnets" => ?filtered_subnet_queries, "subnets" => ?filtered_subnet_queries,
); );
self.start_query( self.start_query(
GroupedQueryType::Subnet(filtered_subnet_queries), QueryType::Subnet(filtered_subnet_queries),
TARGET_PEERS_FOR_GROUPED_QUERY, TARGET_PEERS_FOR_GROUPED_QUERY,
subnet_predicate, subnet_predicate,
); );
@ -768,14 +734,14 @@ impl<TSpec: EthSpec> Discovery<TSpec> {
/// ENR. /// ENR.
fn start_query( fn start_query(
&mut self, &mut self,
grouped_query: GroupedQueryType, query: QueryType,
target_peers: usize, target_peers: usize,
additional_predicate: impl Fn(&Enr) -> bool + Send + 'static, additional_predicate: impl Fn(&Enr) -> bool + Send + 'static,
) { ) {
// Make sure there are subnet queries included // Make sure there are subnet queries included
let contains_queries = match &grouped_query { let contains_queries = match &query {
GroupedQueryType::Subnet(queries) => !queries.is_empty(), QueryType::Subnet(queries) => !queries.is_empty(),
GroupedQueryType::FindPeers => true, QueryType::FindPeers => true,
}; };
if !contains_queries { if !contains_queries {
@ -813,7 +779,10 @@ impl<TSpec: EthSpec> Discovery<TSpec> {
let query_future = self let query_future = self
.discv5 .discv5
.find_node_predicate(random_node, predicate, target_peers) .find_node_predicate(random_node, predicate, target_peers)
.map(|v| QueryResult(grouped_query, v)); .map(|v| QueryResult {
query_type: query,
result: v,
});
// Add the future to active queries, to be executed. // Add the future to active queries, to be executed.
self.active_queries.push(Box::pin(query_future)); self.active_queries.push(Box::pin(query_future));
@ -822,12 +791,12 @@ impl<TSpec: EthSpec> Discovery<TSpec> {
/// Process the completed QueryResult returned from discv5. /// Process the completed QueryResult returned from discv5.
fn process_completed_queries( fn process_completed_queries(
&mut self, &mut self,
query_result: QueryResult, query: QueryResult,
) -> Option<HashMap<PeerId, Option<Instant>>> { ) -> Option<HashMap<PeerId, Option<Instant>>> {
match query_result.0 { match query.query_type {
GroupedQueryType::FindPeers => { QueryType::FindPeers => {
self.find_peer_active = false; self.find_peer_active = false;
match query_result.1 { match query.result {
Ok(r) if r.is_empty() => { Ok(r) if r.is_empty() => {
debug!(self.log, "Discovery query yielded no results."); debug!(self.log, "Discovery query yielded no results.");
} }
@ -846,10 +815,10 @@ impl<TSpec: EthSpec> Discovery<TSpec> {
} }
} }
} }
GroupedQueryType::Subnet(queries) => { QueryType::Subnet(queries) => {
let subnets_searched_for: Vec<Subnet> = let subnets_searched_for: Vec<Subnet> =
queries.iter().map(|query| query.subnet).collect(); queries.iter().map(|query| query.subnet).collect();
match query_result.1 { match query.result {
Ok(r) if r.is_empty() => { Ok(r) if r.is_empty() => {
debug!(self.log, "Grouped subnet discovery query yielded no results."; "subnets_searched_for" => ?subnets_searched_for); debug!(self.log, "Grouped subnet discovery query yielded no results."; "subnets_searched_for" => ?subnets_searched_for);
queries.iter().for_each(|query| { queries.iter().for_each(|query| {
@ -1144,10 +1113,7 @@ mod tests {
subnet_query.min_ttl, subnet_query.min_ttl,
subnet_query.retries, subnet_query.retries,
); );
assert_eq!( assert_eq!(discovery.queued_queries.back(), Some(&subnet_query));
discovery.queued_queries.back(),
Some(&QueryType::Subnet(subnet_query.clone()))
);
// New query should replace old query // New query should replace old query
subnet_query.min_ttl = Some(now + Duration::from_secs(1)); subnet_query.min_ttl = Some(now + Duration::from_secs(1));
@ -1158,7 +1124,7 @@ mod tests {
assert_eq!(discovery.queued_queries.len(), 1); assert_eq!(discovery.queued_queries.len(), 1);
assert_eq!( assert_eq!(
discovery.queued_queries.pop_back(), discovery.queued_queries.pop_back(),
Some(QueryType::Subnet(subnet_query.clone())) Some(subnet_query.clone())
); );
// Retries > MAX_DISCOVERY_RETRY must return immediately without adding // Retries > MAX_DISCOVERY_RETRY must return immediately without adding
@ -1172,39 +1138,6 @@ mod tests {
assert_eq!(discovery.queued_queries.len(), 0); assert_eq!(discovery.queued_queries.len(), 0);
} }
#[tokio::test]
async fn test_process_queue() {
let mut discovery = build_discovery().await;
// FindPeers query is processed if there is no subnet query
discovery.queued_queries.push_back(QueryType::FindPeers);
assert!(discovery.process_queue());
let now = Instant::now();
let subnet_query = SubnetQuery {
subnet: Subnet::Attestation(SubnetId::new(1)),
min_ttl: Some(now + Duration::from_secs(10)),
retries: 0,
};
// Refresh active queries
discovery.active_queries = Default::default();
// SubnetQuery is processed if it's the only queued query
discovery
.queued_queries
.push_back(QueryType::Subnet(subnet_query.clone()));
assert!(discovery.process_queue());
// SubnetQuery is processed if it's there is also 1 queued discovery query
discovery.queued_queries.push_back(QueryType::FindPeers);
discovery
.queued_queries
.push_back(QueryType::Subnet(subnet_query));
// Process Subnet query and FindPeers afterwards.
assert!(discovery.process_queue());
}
fn make_enr(subnet_ids: Vec<usize>) -> Enr { fn make_enr(subnet_ids: Vec<usize>) -> Enr {
let mut builder = EnrBuilder::new("v4"); let mut builder = EnrBuilder::new("v4");
let keypair = libp2p::identity::Keypair::generate_secp256k1(); let keypair = libp2p::identity::Keypair::generate_secp256k1();
@ -1227,7 +1160,7 @@ mod tests {
let instant1 = Some(now + Duration::from_secs(10)); let instant1 = Some(now + Duration::from_secs(10));
let instant2 = Some(now + Duration::from_secs(5)); let instant2 = Some(now + Duration::from_secs(5));
let query = GroupedQueryType::Subnet(vec![ let query = QueryType::Subnet(vec![
SubnetQuery { SubnetQuery {
subnet: Subnet::Attestation(SubnetId::new(1)), subnet: Subnet::Attestation(SubnetId::new(1)),
min_ttl: instant1, min_ttl: instant1,
@ -1248,7 +1181,10 @@ mod tests {
let enrs: Vec<Enr> = vec![enr1.clone(), enr2, enr3]; let enrs: Vec<Enr> = vec![enr1.clone(), enr2, enr3];
let results = discovery let results = discovery
.process_completed_queries(QueryResult(query, Ok(enrs))) .process_completed_queries(QueryResult {
query_type: query,
result: Ok(enrs),
})
.unwrap(); .unwrap();
// enr1 and enr2 are required peers based on the requested subnet ids // enr1 and enr2 are required peers based on the requested subnet ids