Enable longrunning tests to run (#19208)

* p2p/simulations: increased snapshot load timeout for debugging * swarm/network/stream: less nodes for snapshot longrunning tests * swarm/network: fixed longrunning tests * swarm/network/stream: store kademlia in bucket * swarm/network/stream: disabled healthy check in delivery tests * swarm/network/stream: longer SyncUpdateDelay for longrunning tests * swarm/network/stream: more debug output * swarm/network/stream: reduced longrunning snapshot tests to 64 nodes * swarm/network/stream: don't WaitTillHealthy in SyncerSimulation * swarm/network/stream: cleanup for PR
2019-03-05 06:54:46 -05:00 · 2019-03-05 06:54:46 -05:00 · 81ed700157
commit 81ed700157
parent 216bd2ceba
7 changed files with 32 additions and 23 deletions
--- a/p2p/simulations/network.go
+++ b/p2p/simulations/network.go
@ -840,7 +840,8 @@ func (net *Network) snapshot(addServices []string, removeServices []string) (*Sn
 	return snap, nil
 }

-var snapshotLoadTimeout = 120 * time.Second
+// longrunning tests may need a longer timeout
+var snapshotLoadTimeout = 900 * time.Second

 // Load loads a network snapshot
 func (net *Network) Load(snap *Snapshot) error {
--- a/swarm/network/stream/common_test.go
+++ b/swarm/network/stream/common_test.go
@ -134,6 +134,9 @@ func netStoreAndDeliveryWithAddr(ctx *adapters.ServiceContext, bucket *sync.Map,
 	bucket.Store(bucketKeyDB, netStore)
 	bucket.Store(bucketKeyDelivery, delivery)
 	bucket.Store(bucketKeyFileStore, fileStore)
+	// for the kademlia object, we use the global key from the simulation package,
+	// as the simulation will try to access it in the WaitTillHealthy with that key
+	bucket.Store(simulation.BucketKeyKademlia, kad)

 	cleanup := func() {
 		netStore.Close()
--- a/swarm/network/stream/delivery_test.go
+++ b/swarm/network/stream/delivery_test.go
@ -534,12 +534,6 @@ func testDeliveryFromNodes(t *testing.T, nodes, chunkCount int, skipCheck bool)
 				return err
 			}

-			log.Debug("Waiting for kademlia")
-			// TODO this does not seem to be correct usage of the function, as the simulation may have no kademlias
-			if _, err := sim.WaitTillHealthy(ctx); err != nil {
-				return err
-			}
-
 			//get the pivot node's filestore
 			item, ok := sim.NodeItem(pivot, bucketKeyFileStore)
 			if !ok {
--- a/swarm/network/stream/snapshot_retrieval_test.go
+++ b/swarm/network/stream/snapshot_retrieval_test.go
@ -53,7 +53,7 @@ func TestFileRetrieval(t *testing.T) {
 		nodeCount = []int{16}

 		if *longrunning {
-			nodeCount = append(nodeCount, 32, 64, 128)
+			nodeCount = append(nodeCount, 32, 64)
 		} else if testutil.RaceEnabled {
 			nodeCount = []int{4}
 		}
@ -86,7 +86,7 @@ func TestRetrieval(t *testing.T) {
 		chnkCnt := []int{32}

 		if *longrunning {
-			nodeCnt = []int{16, 32, 128}
+			nodeCnt = []int{16, 32, 64}
 			chnkCnt = []int{4, 32, 256}
 		} else if testutil.RaceEnabled {
 			nodeCnt = []int{4}
@ -113,10 +113,15 @@ var retrievalSimServiceMap = map[string]simulation.ServiceFunc{
 			return nil, nil, err
 		}

+		syncUpdateDelay := 1 * time.Second
+		if *longrunning {
+			syncUpdateDelay = 3 * time.Second
+		}
+
 		r := NewRegistry(addr.ID(), delivery, netStore, state.NewInmemoryStore(), &RegistryOptions{
 			Retrieval:       RetrievalEnabled,
 			Syncing:         SyncingAutoSubscribe,
-			SyncUpdateDelay: 3 * time.Second,
+			SyncUpdateDelay: syncUpdateDelay,
 		}, nil)

 		cleanup = func() {
@ -140,7 +145,7 @@ func runFileRetrievalTest(nodeCount int) error {
 	sim := simulation.New(retrievalSimServiceMap)
 	defer sim.Close()

-	log.Info("Initializing test config")
+	log.Info("Initializing test config", "node count", nodeCount)

 	conf := &synctestConfig{}
 	//map of discover ID to indexes of chunks expected at that ID
@ -158,6 +163,8 @@ func runFileRetrievalTest(nodeCount int) error {
 	ctx, cancelSimRun := context.WithTimeout(context.Background(), 3*time.Minute)
 	defer cancelSimRun()

+	log.Info("Starting simulation")
+
 	result := sim.Run(ctx, func(ctx context.Context, sim *simulation.Simulation) error {
 		nodeIDs := sim.UpNodeIDs()
 		for _, n := range nodeIDs {
@ -185,6 +192,8 @@ func runFileRetrievalTest(nodeCount int) error {
 			return err
 		}

+		log.Info("network healthy, start file checks")
+
 		// File retrieval check is repeated until all uploaded files are retrieved from all nodes
 		// or until the timeout is reached.
 	REPEAT:
@ -212,6 +221,8 @@ func runFileRetrievalTest(nodeCount int) error {
 		}
 	})

+	log.Info("Simulation terminated")
+
 	if result.Error != nil {
 		return result.Error
 	}
--- a/swarm/network/stream/snapshot_sync_test.go
+++ b/swarm/network/stream/snapshot_sync_test.go
@ -94,8 +94,8 @@ func TestSyncingViaGlobalSync(t *testing.T) {
 		//if the `longrunning` flag has been provided
 		//run more test combinations
 		if *longrunning {
-			chunkCounts = []int{1, 8, 32, 256, 1024}
-			nodeCounts = []int{16, 32, 64, 128, 256}
+			chunkCounts = []int{64, 128}
+			nodeCounts = []int{32, 64}
 		}

 		for _, chunkCount := range chunkCounts {
--- a/swarm/network/stream/streamer_test.go
+++ b/swarm/network/stream/streamer_test.go
@ -1188,12 +1188,13 @@ func TestGetSubscriptionsRPC(t *testing.T) {

 	// arbitrarily set to 4
 	nodeCount := 4
+	// set the syncUpdateDelay for sync registrations to start
+	syncUpdateDelay := 200 * time.Millisecond
 	// run with more nodes if `longrunning` flag is set
 	if *longrunning {
 		nodeCount = 64
+		syncUpdateDelay = 10 * time.Second
 	}
-	// set the syncUpdateDelay for sync registrations to start
-	syncUpdateDelay := 200 * time.Millisecond
 	// holds the msg code for SubscribeMsg
 	var subscribeMsgCode uint64
 	var ok bool
@ -1241,7 +1242,7 @@ func TestGetSubscriptionsRPC(t *testing.T) {
 	})
 	defer sim.Close()

-	ctx, cancelSimRun := context.WithTimeout(context.Background(), 1*time.Minute)
+	ctx, cancelSimRun := context.WithTimeout(context.Background(), 3*time.Minute)
 	defer cancelSimRun()

 	// upload a snapshot
@ -1267,6 +1268,9 @@ func TestGetSubscriptionsRPC(t *testing.T) {
 	go func() {
 		//for long running sims, waiting 1 sec will not be enough
 		waitDuration := time.Duration(nodeCount/16) * time.Second
+		if *longrunning {
+			waitDuration = syncUpdateDelay
+		}
 		for {
 			select {
 			case <-ctx.Done():
@ -1328,11 +1332,11 @@ func TestGetSubscriptionsRPC(t *testing.T) {
 					}
 				}
 			}
+			log.Debug("All node streams counted", "realCount", realCount)
 		}
-		// every node is mutually subscribed to each other, so the actual count is half of it
 		emc := expectedMsgCount.count()
-		if realCount/2 != emc {
-			return fmt.Errorf("Real subscriptions and expected amount don't match; real: %d, expected: %d", realCount/2, emc)
+		if realCount != emc {
+			return fmt.Errorf("Real subscriptions and expected amount don't match; real: %d, expected: %d", realCount, emc)
 		}
 		return nil
 	})
--- a/swarm/network/stream/syncer_test.go
+++ b/swarm/network/stream/syncer_test.go
@ -173,10 +173,6 @@ func testSyncBetweenNodes(t *testing.T, nodes, chunkCount int, skipCheck bool, p
 			}
 		}
 		// here we distribute chunks of a random file into stores 1...nodes
-		if _, err := sim.WaitTillHealthy(ctx); err != nil {
-			return err
-		}
-
 		// collect hashes in po 1 bin for each node
 		hashes := make([][]storage.Address, nodes)
 		totalHashes := 0