Merge pull request #4778 from filecoin-project/fix/miner-fixes
Fix scheduler lockups after storage is freed
This commit is contained in:
commit
61fa94617d
19
extern/sector-storage/sched_worker.go
vendored
19
extern/sector-storage/sched_worker.go
vendored
@ -139,10 +139,17 @@ func (sw *schedWorker) handleWorker() {
|
||||
|
||||
// wait for more tasks to be assigned by the main scheduler or for the worker
|
||||
// to finish precessing a task
|
||||
update, ok := sw.waitForUpdates()
|
||||
update, pokeSched, ok := sw.waitForUpdates()
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
if pokeSched {
|
||||
// a task has finished preparing, which can mean that we've freed some space on some worker
|
||||
select {
|
||||
case sched.workerChange <- struct{}{}:
|
||||
default: // workerChange is buffered, and scheduling is global, so it's ok if we don't send here
|
||||
}
|
||||
}
|
||||
if update {
|
||||
break
|
||||
}
|
||||
@ -257,23 +264,23 @@ func (sw *schedWorker) requestWindows() bool {
|
||||
return true
|
||||
}
|
||||
|
||||
func (sw *schedWorker) waitForUpdates() (update bool, ok bool) {
|
||||
func (sw *schedWorker) waitForUpdates() (update bool, sched bool, ok bool) {
|
||||
select {
|
||||
case <-sw.heartbeatTimer.C:
|
||||
return false, true
|
||||
return false, false, true
|
||||
case w := <-sw.scheduledWindows:
|
||||
sw.worker.wndLk.Lock()
|
||||
sw.worker.activeWindows = append(sw.worker.activeWindows, w)
|
||||
sw.worker.wndLk.Unlock()
|
||||
return true, true
|
||||
return true, false, true
|
||||
case <-sw.taskDone:
|
||||
log.Debugw("task done", "workerid", sw.wid)
|
||||
return true, true
|
||||
return true, true, true
|
||||
case <-sw.sched.closing:
|
||||
case <-sw.worker.closingMgr:
|
||||
}
|
||||
|
||||
return false, false
|
||||
return false, false, false
|
||||
}
|
||||
|
||||
func (sw *schedWorker) workerCompactWindows() {
|
||||
|
10
extern/sector-storage/stores/local.go
vendored
10
extern/sector-storage/stores/local.go
vendored
@ -298,6 +298,11 @@ func (st *Local) reportHealth(ctx context.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
st.reportStorage(ctx)
|
||||
}
|
||||
}
|
||||
|
||||
func (st *Local) reportStorage(ctx context.Context) {
|
||||
st.localLk.RLock()
|
||||
|
||||
toReport := map[ID]HealthReport{}
|
||||
@ -317,7 +322,6 @@ func (st *Local) reportHealth(ctx context.Context) {
|
||||
log.Warnf("error reporting storage health for %s (%+v): %+v", id, report, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (st *Local) Reserve(ctx context.Context, sid abi.SectorID, ssize abi.SectorSize, ft storiface.SectorFileType, storageIDs storiface.SectorPaths, overheadTab map[storiface.SectorFileType]int) (func(), error) {
|
||||
@ -568,6 +572,8 @@ func (st *Local) removeSector(ctx context.Context, sid abi.SectorID, typ storifa
|
||||
log.Errorf("removing sector (%v) from %s: %+v", sid, spath, err)
|
||||
}
|
||||
|
||||
st.reportStorage(ctx) // report freed space
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -623,6 +629,8 @@ func (st *Local) MoveStorage(ctx context.Context, s abi.SectorID, ssize abi.Sect
|
||||
}
|
||||
}
|
||||
|
||||
st.reportStorage(ctx) // report space use changes
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user