p2p: ensure Server.loop is ticking even if discovery hangs (#20573)

This is a temporary fix for a problem which started happening when the
dialer was changed to read nodes from an enode.Iterator. Before the
iterator change, discovery queries would always return within a couple
seconds even if there was no Internet access. Since the iterator won't
return unless a node is actually found, discoverTask can take much
longer. This means that the 'emergency connect' logic might not execute
in time, leading to a stuck node.
This commit is contained in:
Felix Lange 2020-01-17 11:29:16 +01:00 committed by Péter Szilágyi
parent fcafa0baa5
commit d5acc5ed9e

View File

@ -650,9 +650,12 @@ func (srv *Server) run(dialstate dialer) {
inboundCount = 0 inboundCount = 0
trusted = make(map[enode.ID]bool, len(srv.TrustedNodes)) trusted = make(map[enode.ID]bool, len(srv.TrustedNodes))
taskdone = make(chan task, maxActiveDialTasks) taskdone = make(chan task, maxActiveDialTasks)
tick = time.NewTicker(30 * time.Second)
runningTasks []task runningTasks []task
queuedTasks []task // tasks that can't run yet queuedTasks []task // tasks that can't run yet
) )
defer tick.Stop()
// Put trusted nodes into a map to speed up checks. // Put trusted nodes into a map to speed up checks.
// Trusted peers are loaded on startup or added via AddTrustedPeer RPC. // Trusted peers are loaded on startup or added via AddTrustedPeer RPC.
for _, n := range srv.TrustedNodes { for _, n := range srv.TrustedNodes {
@ -694,6 +697,9 @@ running:
scheduleTasks() scheduleTasks()
select { select {
case <-tick.C:
// This is just here to ensure the dial scheduler runs occasionally.
case <-srv.quit: case <-srv.quit:
// The server was stopped. Run the cleanup logic. // The server was stopped. Run the cleanup logic.
break running break running