Merge pull request #3489 from filecoin-project/fix/sched-deadlocks

sealing sched: Fix deadlock between worker.wndLk / workersLk
This commit is contained in:
Łukasz Magiera 2020-09-02 18:03:18 +02:00 committed by GitHub
commit 5f79ff340d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -563,6 +563,7 @@ func (sh *scheduler) runWorker(wid WorkerID) {
return
}
sh.workersLk.RLock()
worker.wndLk.Lock()
windowsRequested -= sh.workerCompactWindows(worker, wid)
@ -574,8 +575,6 @@ func (sh *scheduler) runWorker(wid WorkerID) {
// process tasks within a window, preferring tasks at lower indexes
for len(firstWindow.todo) > 0 {
sh.workersLk.RLock()
tidx := -1
worker.lk.Lock()
@ -589,7 +588,6 @@ func (sh *scheduler) runWorker(wid WorkerID) {
worker.lk.Unlock()
if tidx == -1 {
sh.workersLk.RUnlock()
break assignLoop
}
@ -597,7 +595,6 @@ func (sh *scheduler) runWorker(wid WorkerID) {
log.Debugf("assign worker sector %d", todo.sector.Number)
err := sh.assignWorker(taskDone, wid, worker, todo)
sh.workersLk.RUnlock()
if err != nil {
log.Error("assignWorker error: %+v", err)
@ -618,6 +615,7 @@ func (sh *scheduler) runWorker(wid WorkerID) {
}
worker.wndLk.Unlock()
sh.workersLk.RUnlock()
}
}()
}
@ -776,14 +774,19 @@ func (sh *scheduler) dropWorker(wid WorkerID) {
}
func (sh *scheduler) workerCleanup(wid WorkerID, w *workerHandle) {
if !w.cleanupStarted {
select {
case <-w.closingMgr:
default:
close(w.closingMgr)
}
sh.workersLk.Unlock()
select {
case <-w.closedMgr:
case <-time.After(time.Second):
log.Errorf("timeout closing worker manager goroutine %d", wid)
}
sh.workersLk.Lock()
if !w.cleanupStarted {
w.cleanupStarted = true