make sync sim end faster by checking sync every epoch (#986)

This commit is contained in:
divma 2020-04-06 02:04:06 -05:00 committed by GitHub
parent c188227cc2
commit 4cba745df6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 88 additions and 39 deletions

View File

@ -57,17 +57,17 @@ pub fn cli_app<'a, 'b>() -> App<'a, 'b> {
.help("Epoch delay for new beacon node to start syncing (default 50)"), .help("Epoch delay for new beacon node to start syncing (default 50)"),
) )
.arg( .arg(
Arg::with_name("sync_delay") Arg::with_name("sync_timeout")
.long("sync_delay") .long("sync_timeout")
.takes_value(true) .takes_value(true)
.help("Epoch delay for newly added beacon nodes get synced (default 10)"), .help("Number of epochs after which newly added beacon nodes must be synced (default 10)"),
) )
.arg( .arg(
Arg::with_name("strategy") Arg::with_name("strategy")
.long("strategy") .long("strategy")
.takes_value(true) .takes_value(true)
.possible_values(&["one-node", "two-nodes", "mixed", "all"]) .possible_values(&["one-node", "two-nodes", "mixed", "all"])
.help("Sync strategy to run. (default all)"), .help("Sync verification strategy to run. (default all)"),
), ),
) )
} }

View File

@ -93,13 +93,13 @@ fn run_beacon_chain_sim(matches: &ArgMatches) -> Result<(), String> {
fn run_syncing_sim(matches: &ArgMatches) -> Result<(), String> { fn run_syncing_sim(matches: &ArgMatches) -> Result<(), String> {
let initial_delay = value_t!(matches, "initial_delay", u64).unwrap_or(50); let initial_delay = value_t!(matches, "initial_delay", u64).unwrap_or(50);
let sync_delay = value_t!(matches, "sync_delay", u64).unwrap_or(10); let sync_timeout = value_t!(matches, "sync_timeout", u64).unwrap_or(10);
let speed_up_factor = value_t!(matches, "speedup", u64).unwrap_or(15); let speed_up_factor = value_t!(matches, "speedup", u64).unwrap_or(15);
let strategy = value_t!(matches, "strategy", String).unwrap_or("all".into()); let strategy = value_t!(matches, "strategy", String).unwrap_or("all".into());
println!("Syncing Simulator:"); println!("Syncing Simulator:");
println!(" initial_delay:{}", initial_delay); println!(" initial delay:{}", initial_delay);
println!(" sync delay:{}", sync_delay); println!(" sync timeout:{}", sync_timeout);
println!(" speed up factor:{}", speed_up_factor); println!(" speed up factor:{}", speed_up_factor);
println!(" strategy:{}", strategy); println!(" strategy:{}", strategy);
@ -109,7 +109,7 @@ fn run_syncing_sim(matches: &ArgMatches) -> Result<(), String> {
syncing_sim( syncing_sim(
speed_up_factor, speed_up_factor,
initial_delay, initial_delay,
sync_delay, sync_timeout,
strategy, strategy,
log_level, log_level,
log_format, log_format,
@ -119,7 +119,7 @@ fn run_syncing_sim(matches: &ArgMatches) -> Result<(), String> {
fn syncing_sim( fn syncing_sim(
speed_up_factor: u64, speed_up_factor: u64,
initial_delay: u64, initial_delay: u64,
sync_delay: u64, sync_timeout: u64,
strategy: String, strategy: String,
log_level: &str, log_level: &str,
log_format: Option<&str>, log_format: Option<&str>,
@ -171,7 +171,7 @@ fn syncing_sim(
beacon_config.clone(), beacon_config.clone(),
slot_duration, slot_duration,
initial_delay, initial_delay,
sync_delay, sync_timeout,
)) ))
.join(final_future) .join(final_future)
.map(|_| network) .map(|_| network)

View File

@ -1,8 +1,10 @@
use crate::checks::{epoch_delay, verify_all_finalized_at}; use crate::checks::{epoch_delay, verify_all_finalized_at};
use crate::local_network::LocalNetwork; use crate::local_network::LocalNetwork;
use futures::{Future, IntoFuture}; use futures::stream;
use futures::{Future, IntoFuture, Stream};
use node_test_rig::ClientConfig; use node_test_rig::ClientConfig;
use std::time::Duration; use std::time::Duration;
use tokio::timer::Interval;
use types::{Epoch, EthSpec}; use types::{Epoch, EthSpec};
pub fn pick_strategy<E: EthSpec>( pub fn pick_strategy<E: EthSpec>(
@ -11,7 +13,7 @@ pub fn pick_strategy<E: EthSpec>(
beacon_config: ClientConfig, beacon_config: ClientConfig,
slot_duration: Duration, slot_duration: Duration,
initial_delay: u64, initial_delay: u64,
sync_delay: u64, sync_timeout: u64,
) -> Box<dyn Future<Item = (), Error = String> + Send + 'static> { ) -> Box<dyn Future<Item = (), Error = String> + Send + 'static> {
match strategy { match strategy {
"one-node" => Box::new(verify_one_node_sync( "one-node" => Box::new(verify_one_node_sync(
@ -19,42 +21,44 @@ pub fn pick_strategy<E: EthSpec>(
beacon_config, beacon_config,
slot_duration, slot_duration,
initial_delay, initial_delay,
sync_delay, sync_timeout,
)), )),
"two-nodes" => Box::new(verify_two_nodes_sync( "two-nodes" => Box::new(verify_two_nodes_sync(
network, network,
beacon_config, beacon_config,
slot_duration, slot_duration,
initial_delay, initial_delay,
sync_delay, sync_timeout,
)), )),
"mixed" => Box::new(verify_in_between_sync( "mixed" => Box::new(verify_in_between_sync(
network, network,
beacon_config, beacon_config,
slot_duration, slot_duration,
initial_delay, initial_delay,
sync_delay, sync_timeout,
)), )),
"all" => Box::new(verify_syncing( "all" => Box::new(verify_syncing(
network, network,
beacon_config, beacon_config,
slot_duration, slot_duration,
initial_delay, initial_delay,
sync_delay, sync_timeout,
)), )),
_ => Box::new(Err("Invalid strategy".into()).into_future()), _ => Box::new(Err("Invalid strategy".into()).into_future()),
} }
} }
/// Verify one node added after `initial_delay` epochs is in sync /// Verify one node added after `initial_delay` epochs is in sync
/// after `sync_delay` epochs. /// after `sync_timeout` epochs.
pub fn verify_one_node_sync<E: EthSpec>( pub fn verify_one_node_sync<E: EthSpec>(
network: LocalNetwork<E>, network: LocalNetwork<E>,
beacon_config: ClientConfig, beacon_config: ClientConfig,
slot_duration: Duration, slot_duration: Duration,
initial_delay: u64, initial_delay: u64,
sync_delay: u64, sync_timeout: u64,
) -> impl Future<Item = (), Error = String> { ) -> impl Future<Item = (), Error = String> {
let epoch_duration = slot_duration * (E::slots_per_epoch() as u32);
let network_c = network.clone();
// Delay for `initial_delay` epochs before adding another node to start syncing // Delay for `initial_delay` epochs before adding another node to start syncing
epoch_delay( epoch_delay(
Epoch::new(initial_delay), Epoch::new(initial_delay),
@ -66,8 +70,14 @@ pub fn verify_one_node_sync<E: EthSpec>(
network.add_beacon_node(beacon_config).map(|_| network) network.add_beacon_node(beacon_config).map(|_| network)
}) })
.and_then(move |network| { .and_then(move |network| {
// Delay for `sync_delay` epochs before verifying synced state. // Check every `epoch_duration` if nodes are synced
epoch_delay(Epoch::new(sync_delay), slot_duration, E::slots_per_epoch()).map(|_| network) // limited to at most `sync_timeout` epochs
Interval::new_interval(epoch_duration)
.take(sync_timeout)
.map_err(|_| "Failed to create interval".to_string())
.take_while(move |_| check_still_syncing(&network_c))
.for_each(|_| Ok(())) // consume the stream
.map(|_| network)
}) })
.and_then(move |network| network.bootnode_epoch().map(|e| (e, network))) .and_then(move |network| network.bootnode_epoch().map(|e| (e, network)))
.and_then(move |(epoch, network)| { .and_then(move |(epoch, network)| {
@ -76,14 +86,16 @@ pub fn verify_one_node_sync<E: EthSpec>(
} }
/// Verify two nodes added after `initial_delay` epochs are in sync /// Verify two nodes added after `initial_delay` epochs are in sync
/// after `sync_delay` epochs. /// after `sync_timeout` epochs.
pub fn verify_two_nodes_sync<E: EthSpec>( pub fn verify_two_nodes_sync<E: EthSpec>(
network: LocalNetwork<E>, network: LocalNetwork<E>,
beacon_config: ClientConfig, beacon_config: ClientConfig,
slot_duration: Duration, slot_duration: Duration,
initial_delay: u64, initial_delay: u64,
sync_delay: u64, sync_timeout: u64,
) -> impl Future<Item = (), Error = String> { ) -> impl Future<Item = (), Error = String> {
let epoch_duration = slot_duration * (E::slots_per_epoch() as u32);
let network_c = network.clone();
// Delay for `initial_delay` epochs before adding another node to start syncing // Delay for `initial_delay` epochs before adding another node to start syncing
epoch_delay( epoch_delay(
Epoch::new(initial_delay), Epoch::new(initial_delay),
@ -100,8 +112,14 @@ pub fn verify_two_nodes_sync<E: EthSpec>(
}) })
}) })
.and_then(move |network| { .and_then(move |network| {
// Delay for `sync_delay` epochs before verifying synced state. // Check every `epoch_duration` if nodes are synced
epoch_delay(Epoch::new(sync_delay), slot_duration, E::slots_per_epoch()).map(|_| network) // limited to at most `sync_timeout` epochs
Interval::new_interval(epoch_duration)
.take(sync_timeout)
.map_err(|_| "Failed to create interval".to_string())
.take_while(move |_| check_still_syncing(&network_c))
.for_each(|_| Ok(())) // consume the stream
.map(|_| network)
}) })
.and_then(move |network| network.bootnode_epoch().map(|e| (e, network))) .and_then(move |network| network.bootnode_epoch().map(|e| (e, network)))
.and_then(move |(epoch, network)| { .and_then(move |(epoch, network)| {
@ -110,15 +128,17 @@ pub fn verify_two_nodes_sync<E: EthSpec>(
} }
/// Add 2 syncing nodes after `initial_delay` epochs, /// Add 2 syncing nodes after `initial_delay` epochs,
/// Add another node after `sync_delay - 5` epochs and verify all are /// Add another node after `sync_timeout - 5` epochs and verify all are
/// in sync after `sync_delay + 5` epochs. /// in sync after `sync_timeout + 5` epochs.
pub fn verify_in_between_sync<E: EthSpec>( pub fn verify_in_between_sync<E: EthSpec>(
network: LocalNetwork<E>, network: LocalNetwork<E>,
beacon_config: ClientConfig, beacon_config: ClientConfig,
slot_duration: Duration, slot_duration: Duration,
initial_delay: u64, initial_delay: u64,
sync_delay: u64, sync_timeout: u64,
) -> impl Future<Item = (), Error = String> { ) -> impl Future<Item = (), Error = String> {
let epoch_duration = slot_duration * (E::slots_per_epoch() as u32);
let network_c = network.clone();
// Delay for `initial_delay` epochs before adding another node to start syncing // Delay for `initial_delay` epochs before adding another node to start syncing
let config1 = beacon_config.clone(); let config1 = beacon_config.clone();
epoch_delay( epoch_delay(
@ -138,7 +158,7 @@ pub fn verify_in_between_sync<E: EthSpec>(
.and_then(move |network| { .and_then(move |network| {
// Delay before adding additional syncing nodes. // Delay before adding additional syncing nodes.
epoch_delay( epoch_delay(
Epoch::new(sync_delay - 5), Epoch::new(sync_timeout - 5),
slot_duration, slot_duration,
E::slots_per_epoch(), E::slots_per_epoch(),
) )
@ -149,12 +169,13 @@ pub fn verify_in_between_sync<E: EthSpec>(
network.add_beacon_node(config1.clone()).map(|_| network) network.add_beacon_node(config1.clone()).map(|_| network)
}) })
.and_then(move |network| { .and_then(move |network| {
// Delay for `sync_delay` epochs before verifying synced state. // Check every `epoch_duration` if nodes are synced
epoch_delay( // limited to at most `sync_timeout` epochs
Epoch::new(sync_delay + 5), Interval::new_interval(epoch_duration)
slot_duration, .take(sync_timeout + 5)
E::slots_per_epoch(), .map_err(|_| "Failed to create interval".to_string())
) .take_while(move |_| check_still_syncing(&network_c))
.for_each(|_| Ok(())) // consume the stream
.map(|_| network) .map(|_| network)
}) })
.and_then(move |network| network.bootnode_epoch().map(|e| (e, network))) .and_then(move |network| network.bootnode_epoch().map(|e| (e, network)))
@ -169,14 +190,14 @@ pub fn verify_syncing<E: EthSpec>(
beacon_config: ClientConfig, beacon_config: ClientConfig,
slot_duration: Duration, slot_duration: Duration,
initial_delay: u64, initial_delay: u64,
sync_delay: u64, sync_timeout: u64,
) -> impl Future<Item = (), Error = String> { ) -> impl Future<Item = (), Error = String> {
verify_one_node_sync( verify_one_node_sync(
network.clone(), network.clone(),
beacon_config.clone(), beacon_config.clone(),
slot_duration, slot_duration,
initial_delay, initial_delay,
sync_delay, sync_timeout,
) )
.map(|_| println!("Completed one node sync")) .map(|_| println!("Completed one node sync"))
.and_then(move |_| { .and_then(move |_| {
@ -185,7 +206,7 @@ pub fn verify_syncing<E: EthSpec>(
beacon_config.clone(), beacon_config.clone(),
slot_duration, slot_duration,
initial_delay, initial_delay,
sync_delay, sync_timeout,
) )
.map(|_| { .map(|_| {
println!("Completed two node sync"); println!("Completed two node sync");
@ -198,8 +219,36 @@ pub fn verify_syncing<E: EthSpec>(
beacon_config, beacon_config,
slot_duration, slot_duration,
initial_delay, initial_delay,
sync_delay, sync_timeout,
) )
.map(|_| println!("Completed in between sync")) .map(|_| println!("Completed in between sync"))
}) })
} }
pub fn check_still_syncing<E: EthSpec>(
network: &LocalNetwork<E>,
) -> impl Future<Item = bool, Error = String> {
let net = network.clone();
network
.remote_nodes()
.into_future()
// get all head epochs
.and_then(|remote_nodes| {
stream::unfold(remote_nodes.into_iter(), |mut iter| {
iter.next().map(|remote_node| {
remote_node
.http
.beacon()
.get_head()
.map(|head| head.finalized_slot.epoch(E::slots_per_epoch()))
.map(|epoch| (epoch, iter))
.map_err(|e| format!("Get head via http failed: {:?}", e))
})
})
.collect()
})
// find current epoch
.and_then(move |epochs| net.bootnode_epoch().map(|epoch| (epochs, epoch)))
.and_then(move |(epochs, epoch)| Ok(epochs.iter().any(|head_epoch| *head_epoch != epoch)))
.map_err(|e| format!("Failed syncing check: {:?}", e))
}