Enable longrunning tests to run (#19208)

* p2p/simulations: increased snapshot load timeout for debugging

* swarm/network/stream: less nodes for snapshot longrunning tests

* swarm/network: fixed longrunning tests

* swarm/network/stream: store kademlia in bucket

* swarm/network/stream: disabled healthy check in delivery tests

* swarm/network/stream: longer SyncUpdateDelay for longrunning tests

* swarm/network/stream: more debug output

* swarm/network/stream: reduced longrunning snapshot tests to 64 nodes

* swarm/network/stream: don't WaitTillHealthy in SyncerSimulation

* swarm/network/stream: cleanup for PR
This commit is contained in:
holisticode 2019-03-05 06:54:46 -05:00 committed by Viktor Trón
parent 216bd2ceba
commit 81ed700157
7 changed files with 32 additions and 23 deletions

View File

@ -840,7 +840,8 @@ func (net *Network) snapshot(addServices []string, removeServices []string) (*Sn
return snap, nil
}
var snapshotLoadTimeout = 120 * time.Second
// longrunning tests may need a longer timeout
var snapshotLoadTimeout = 900 * time.Second
// Load loads a network snapshot
func (net *Network) Load(snap *Snapshot) error {

View File

@ -134,6 +134,9 @@ func netStoreAndDeliveryWithAddr(ctx *adapters.ServiceContext, bucket *sync.Map,
bucket.Store(bucketKeyDB, netStore)
bucket.Store(bucketKeyDelivery, delivery)
bucket.Store(bucketKeyFileStore, fileStore)
// for the kademlia object, we use the global key from the simulation package,
// as the simulation will try to access it in the WaitTillHealthy with that key
bucket.Store(simulation.BucketKeyKademlia, kad)
cleanup := func() {
netStore.Close()

View File

@ -534,12 +534,6 @@ func testDeliveryFromNodes(t *testing.T, nodes, chunkCount int, skipCheck bool)
return err
}
log.Debug("Waiting for kademlia")
// TODO this does not seem to be correct usage of the function, as the simulation may have no kademlias
if _, err := sim.WaitTillHealthy(ctx); err != nil {
return err
}
//get the pivot node's filestore
item, ok := sim.NodeItem(pivot, bucketKeyFileStore)
if !ok {

View File

@ -53,7 +53,7 @@ func TestFileRetrieval(t *testing.T) {
nodeCount = []int{16}
if *longrunning {
nodeCount = append(nodeCount, 32, 64, 128)
nodeCount = append(nodeCount, 32, 64)
} else if testutil.RaceEnabled {
nodeCount = []int{4}
}
@ -86,7 +86,7 @@ func TestRetrieval(t *testing.T) {
chnkCnt := []int{32}
if *longrunning {
nodeCnt = []int{16, 32, 128}
nodeCnt = []int{16, 32, 64}
chnkCnt = []int{4, 32, 256}
} else if testutil.RaceEnabled {
nodeCnt = []int{4}
@ -113,10 +113,15 @@ var retrievalSimServiceMap = map[string]simulation.ServiceFunc{
return nil, nil, err
}
syncUpdateDelay := 1 * time.Second
if *longrunning {
syncUpdateDelay = 3 * time.Second
}
r := NewRegistry(addr.ID(), delivery, netStore, state.NewInmemoryStore(), &RegistryOptions{
Retrieval: RetrievalEnabled,
Syncing: SyncingAutoSubscribe,
SyncUpdateDelay: 3 * time.Second,
SyncUpdateDelay: syncUpdateDelay,
}, nil)
cleanup = func() {
@ -140,7 +145,7 @@ func runFileRetrievalTest(nodeCount int) error {
sim := simulation.New(retrievalSimServiceMap)
defer sim.Close()
log.Info("Initializing test config")
log.Info("Initializing test config", "node count", nodeCount)
conf := &synctestConfig{}
//map of discover ID to indexes of chunks expected at that ID
@ -158,6 +163,8 @@ func runFileRetrievalTest(nodeCount int) error {
ctx, cancelSimRun := context.WithTimeout(context.Background(), 3*time.Minute)
defer cancelSimRun()
log.Info("Starting simulation")
result := sim.Run(ctx, func(ctx context.Context, sim *simulation.Simulation) error {
nodeIDs := sim.UpNodeIDs()
for _, n := range nodeIDs {
@ -185,6 +192,8 @@ func runFileRetrievalTest(nodeCount int) error {
return err
}
log.Info("network healthy, start file checks")
// File retrieval check is repeated until all uploaded files are retrieved from all nodes
// or until the timeout is reached.
REPEAT:
@ -212,6 +221,8 @@ func runFileRetrievalTest(nodeCount int) error {
}
})
log.Info("Simulation terminated")
if result.Error != nil {
return result.Error
}

View File

@ -94,8 +94,8 @@ func TestSyncingViaGlobalSync(t *testing.T) {
//if the `longrunning` flag has been provided
//run more test combinations
if *longrunning {
chunkCounts = []int{1, 8, 32, 256, 1024}
nodeCounts = []int{16, 32, 64, 128, 256}
chunkCounts = []int{64, 128}
nodeCounts = []int{32, 64}
}
for _, chunkCount := range chunkCounts {

View File

@ -1188,12 +1188,13 @@ func TestGetSubscriptionsRPC(t *testing.T) {
// arbitrarily set to 4
nodeCount := 4
// set the syncUpdateDelay for sync registrations to start
syncUpdateDelay := 200 * time.Millisecond
// run with more nodes if `longrunning` flag is set
if *longrunning {
nodeCount = 64
syncUpdateDelay = 10 * time.Second
}
// set the syncUpdateDelay for sync registrations to start
syncUpdateDelay := 200 * time.Millisecond
// holds the msg code for SubscribeMsg
var subscribeMsgCode uint64
var ok bool
@ -1241,7 +1242,7 @@ func TestGetSubscriptionsRPC(t *testing.T) {
})
defer sim.Close()
ctx, cancelSimRun := context.WithTimeout(context.Background(), 1*time.Minute)
ctx, cancelSimRun := context.WithTimeout(context.Background(), 3*time.Minute)
defer cancelSimRun()
// upload a snapshot
@ -1267,6 +1268,9 @@ func TestGetSubscriptionsRPC(t *testing.T) {
go func() {
//for long running sims, waiting 1 sec will not be enough
waitDuration := time.Duration(nodeCount/16) * time.Second
if *longrunning {
waitDuration = syncUpdateDelay
}
for {
select {
case <-ctx.Done():
@ -1328,11 +1332,11 @@ func TestGetSubscriptionsRPC(t *testing.T) {
}
}
}
log.Debug("All node streams counted", "realCount", realCount)
}
// every node is mutually subscribed to each other, so the actual count is half of it
emc := expectedMsgCount.count()
if realCount/2 != emc {
return fmt.Errorf("Real subscriptions and expected amount don't match; real: %d, expected: %d", realCount/2, emc)
if realCount != emc {
return fmt.Errorf("Real subscriptions and expected amount don't match; real: %d, expected: %d", realCount, emc)
}
return nil
})

View File

@ -173,10 +173,6 @@ func testSyncBetweenNodes(t *testing.T, nodes, chunkCount int, skipCheck bool, p
}
}
// here we distribute chunks of a random file into stores 1...nodes
if _, err := sim.WaitTillHealthy(ctx); err != nil {
return err
}
// collect hashes in po 1 bin for each node
hashes := make([][]storage.Address, nodes)
totalHashes := 0