From 81ed70015704737b3e5329314a62a3a5aaa74e8a Mon Sep 17 00:00:00 2001 From: holisticode Date: Tue, 5 Mar 2019 06:54:46 -0500 Subject: [PATCH] Enable longrunning tests to run (#19208) * p2p/simulations: increased snapshot load timeout for debugging * swarm/network/stream: less nodes for snapshot longrunning tests * swarm/network: fixed longrunning tests * swarm/network/stream: store kademlia in bucket * swarm/network/stream: disabled healthy check in delivery tests * swarm/network/stream: longer SyncUpdateDelay for longrunning tests * swarm/network/stream: more debug output * swarm/network/stream: reduced longrunning snapshot tests to 64 nodes * swarm/network/stream: don't WaitTillHealthy in SyncerSimulation * swarm/network/stream: cleanup for PR --- p2p/simulations/network.go | 3 ++- swarm/network/stream/common_test.go | 3 +++ swarm/network/stream/delivery_test.go | 6 ------ .../network/stream/snapshot_retrieval_test.go | 19 +++++++++++++++---- swarm/network/stream/snapshot_sync_test.go | 4 ++-- swarm/network/stream/streamer_test.go | 16 ++++++++++------ swarm/network/stream/syncer_test.go | 4 ---- 7 files changed, 32 insertions(+), 23 deletions(-) diff --git a/p2p/simulations/network.go b/p2p/simulations/network.go index 483d4ab87..f03c953e8 100644 --- a/p2p/simulations/network.go +++ b/p2p/simulations/network.go @@ -840,7 +840,8 @@ func (net *Network) snapshot(addServices []string, removeServices []string) (*Sn return snap, nil } -var snapshotLoadTimeout = 120 * time.Second +// longrunning tests may need a longer timeout +var snapshotLoadTimeout = 900 * time.Second // Load loads a network snapshot func (net *Network) Load(snap *Snapshot) error { diff --git a/swarm/network/stream/common_test.go b/swarm/network/stream/common_test.go index afd08d275..ec29e16e3 100644 --- a/swarm/network/stream/common_test.go +++ b/swarm/network/stream/common_test.go @@ -134,6 +134,9 @@ func netStoreAndDeliveryWithAddr(ctx *adapters.ServiceContext, bucket *sync.Map, bucket.Store(bucketKeyDB, netStore) bucket.Store(bucketKeyDelivery, delivery) bucket.Store(bucketKeyFileStore, fileStore) + // for the kademlia object, we use the global key from the simulation package, + // as the simulation will try to access it in the WaitTillHealthy with that key + bucket.Store(simulation.BucketKeyKademlia, kad) cleanup := func() { netStore.Close() diff --git a/swarm/network/stream/delivery_test.go b/swarm/network/stream/delivery_test.go index 6ff18fbc6..50b788150 100644 --- a/swarm/network/stream/delivery_test.go +++ b/swarm/network/stream/delivery_test.go @@ -534,12 +534,6 @@ func testDeliveryFromNodes(t *testing.T, nodes, chunkCount int, skipCheck bool) return err } - log.Debug("Waiting for kademlia") - // TODO this does not seem to be correct usage of the function, as the simulation may have no kademlias - if _, err := sim.WaitTillHealthy(ctx); err != nil { - return err - } - //get the pivot node's filestore item, ok := sim.NodeItem(pivot, bucketKeyFileStore) if !ok { diff --git a/swarm/network/stream/snapshot_retrieval_test.go b/swarm/network/stream/snapshot_retrieval_test.go index 5e24a39f5..2fdf8e9e3 100644 --- a/swarm/network/stream/snapshot_retrieval_test.go +++ b/swarm/network/stream/snapshot_retrieval_test.go @@ -53,7 +53,7 @@ func TestFileRetrieval(t *testing.T) { nodeCount = []int{16} if *longrunning { - nodeCount = append(nodeCount, 32, 64, 128) + nodeCount = append(nodeCount, 32, 64) } else if testutil.RaceEnabled { nodeCount = []int{4} } @@ -86,7 +86,7 @@ func TestRetrieval(t *testing.T) { chnkCnt := []int{32} if *longrunning { - nodeCnt = []int{16, 32, 128} + nodeCnt = []int{16, 32, 64} chnkCnt = []int{4, 32, 256} } else if testutil.RaceEnabled { nodeCnt = []int{4} @@ -113,10 +113,15 @@ var retrievalSimServiceMap = map[string]simulation.ServiceFunc{ return nil, nil, err } + syncUpdateDelay := 1 * time.Second + if *longrunning { + syncUpdateDelay = 3 * time.Second + } + r := NewRegistry(addr.ID(), delivery, netStore, state.NewInmemoryStore(), &RegistryOptions{ Retrieval: RetrievalEnabled, Syncing: SyncingAutoSubscribe, - SyncUpdateDelay: 3 * time.Second, + SyncUpdateDelay: syncUpdateDelay, }, nil) cleanup = func() { @@ -140,7 +145,7 @@ func runFileRetrievalTest(nodeCount int) error { sim := simulation.New(retrievalSimServiceMap) defer sim.Close() - log.Info("Initializing test config") + log.Info("Initializing test config", "node count", nodeCount) conf := &synctestConfig{} //map of discover ID to indexes of chunks expected at that ID @@ -158,6 +163,8 @@ func runFileRetrievalTest(nodeCount int) error { ctx, cancelSimRun := context.WithTimeout(context.Background(), 3*time.Minute) defer cancelSimRun() + log.Info("Starting simulation") + result := sim.Run(ctx, func(ctx context.Context, sim *simulation.Simulation) error { nodeIDs := sim.UpNodeIDs() for _, n := range nodeIDs { @@ -185,6 +192,8 @@ func runFileRetrievalTest(nodeCount int) error { return err } + log.Info("network healthy, start file checks") + // File retrieval check is repeated until all uploaded files are retrieved from all nodes // or until the timeout is reached. REPEAT: @@ -212,6 +221,8 @@ func runFileRetrievalTest(nodeCount int) error { } }) + log.Info("Simulation terminated") + if result.Error != nil { return result.Error } diff --git a/swarm/network/stream/snapshot_sync_test.go b/swarm/network/stream/snapshot_sync_test.go index 330f39712..9737ec0a5 100644 --- a/swarm/network/stream/snapshot_sync_test.go +++ b/swarm/network/stream/snapshot_sync_test.go @@ -94,8 +94,8 @@ func TestSyncingViaGlobalSync(t *testing.T) { //if the `longrunning` flag has been provided //run more test combinations if *longrunning { - chunkCounts = []int{1, 8, 32, 256, 1024} - nodeCounts = []int{16, 32, 64, 128, 256} + chunkCounts = []int{64, 128} + nodeCounts = []int{32, 64} } for _, chunkCount := range chunkCounts { diff --git a/swarm/network/stream/streamer_test.go b/swarm/network/stream/streamer_test.go index 755b74537..56e5e8903 100644 --- a/swarm/network/stream/streamer_test.go +++ b/swarm/network/stream/streamer_test.go @@ -1188,12 +1188,13 @@ func TestGetSubscriptionsRPC(t *testing.T) { // arbitrarily set to 4 nodeCount := 4 + // set the syncUpdateDelay for sync registrations to start + syncUpdateDelay := 200 * time.Millisecond // run with more nodes if `longrunning` flag is set if *longrunning { nodeCount = 64 + syncUpdateDelay = 10 * time.Second } - // set the syncUpdateDelay for sync registrations to start - syncUpdateDelay := 200 * time.Millisecond // holds the msg code for SubscribeMsg var subscribeMsgCode uint64 var ok bool @@ -1241,7 +1242,7 @@ func TestGetSubscriptionsRPC(t *testing.T) { }) defer sim.Close() - ctx, cancelSimRun := context.WithTimeout(context.Background(), 1*time.Minute) + ctx, cancelSimRun := context.WithTimeout(context.Background(), 3*time.Minute) defer cancelSimRun() // upload a snapshot @@ -1267,6 +1268,9 @@ func TestGetSubscriptionsRPC(t *testing.T) { go func() { //for long running sims, waiting 1 sec will not be enough waitDuration := time.Duration(nodeCount/16) * time.Second + if *longrunning { + waitDuration = syncUpdateDelay + } for { select { case <-ctx.Done(): @@ -1328,11 +1332,11 @@ func TestGetSubscriptionsRPC(t *testing.T) { } } } + log.Debug("All node streams counted", "realCount", realCount) } - // every node is mutually subscribed to each other, so the actual count is half of it emc := expectedMsgCount.count() - if realCount/2 != emc { - return fmt.Errorf("Real subscriptions and expected amount don't match; real: %d, expected: %d", realCount/2, emc) + if realCount != emc { + return fmt.Errorf("Real subscriptions and expected amount don't match; real: %d, expected: %d", realCount, emc) } return nil }) diff --git a/swarm/network/stream/syncer_test.go b/swarm/network/stream/syncer_test.go index df3008381..07586714e 100644 --- a/swarm/network/stream/syncer_test.go +++ b/swarm/network/stream/syncer_test.go @@ -173,10 +173,6 @@ func testSyncBetweenNodes(t *testing.T, nodes, chunkCount int, skipCheck bool, p } } // here we distribute chunks of a random file into stores 1...nodes - if _, err := sim.WaitTillHealthy(ctx); err != nil { - return err - } - // collect hashes in po 1 bin for each node hashes := make([][]storage.Address, nodes) totalHashes := 0