Merge pull request #4778 from filecoin-project/fix/miner-fixes
Fix scheduler lockups after storage is freed
This commit is contained in:
commit
61fa94617d
19
extern/sector-storage/sched_worker.go
vendored
19
extern/sector-storage/sched_worker.go
vendored
@ -139,10 +139,17 @@ func (sw *schedWorker) handleWorker() {
|
||||
|
||||
// wait for more tasks to be assigned by the main scheduler or for the worker
|
||||
// to finish precessing a task
|
||||
update, ok := sw.waitForUpdates()
|
||||
update, pokeSched, ok := sw.waitForUpdates()
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
if pokeSched {
|
||||
// a task has finished preparing, which can mean that we've freed some space on some worker
|
||||
select {
|
||||
case sched.workerChange <- struct{}{}:
|
||||
default: // workerChange is buffered, and scheduling is global, so it's ok if we don't send here
|
||||
}
|
||||
}
|
||||
if update {
|
||||
break
|
||||
}
|
||||
@ -257,23 +264,23 @@ func (sw *schedWorker) requestWindows() bool {
|
||||
return true
|
||||
}
|
||||
|
||||
func (sw *schedWorker) waitForUpdates() (update bool, ok bool) {
|
||||
func (sw *schedWorker) waitForUpdates() (update bool, sched bool, ok bool) {
|
||||
select {
|
||||
case <-sw.heartbeatTimer.C:
|
||||
return false, true
|
||||
return false, false, true
|
||||
case w := <-sw.scheduledWindows:
|
||||
sw.worker.wndLk.Lock()
|
||||
sw.worker.activeWindows = append(sw.worker.activeWindows, w)
|
||||
sw.worker.wndLk.Unlock()
|
||||
return true, true
|
||||
return true, false, true
|
||||
case <-sw.taskDone:
|
||||
log.Debugw("task done", "workerid", sw.wid)
|
||||
return true, true
|
||||
return true, true, true
|
||||
case <-sw.sched.closing:
|
||||
case <-sw.worker.closingMgr:
|
||||
}
|
||||
|
||||
return false, false
|
||||
return false, false, false
|
||||
}
|
||||
|
||||
func (sw *schedWorker) workerCompactWindows() {
|
||||
|
34
extern/sector-storage/stores/local.go
vendored
34
extern/sector-storage/stores/local.go
vendored
@ -298,24 +298,28 @@ func (st *Local) reportHealth(ctx context.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
st.localLk.RLock()
|
||||
st.reportStorage(ctx)
|
||||
}
|
||||
}
|
||||
|
||||
toReport := map[ID]HealthReport{}
|
||||
for id, p := range st.paths {
|
||||
stat, err := p.stat(st.localStorage)
|
||||
func (st *Local) reportStorage(ctx context.Context) {
|
||||
st.localLk.RLock()
|
||||
|
||||
toReport[id] = HealthReport{
|
||||
Stat: stat,
|
||||
Err: err,
|
||||
}
|
||||
toReport := map[ID]HealthReport{}
|
||||
for id, p := range st.paths {
|
||||
stat, err := p.stat(st.localStorage)
|
||||
|
||||
toReport[id] = HealthReport{
|
||||
Stat: stat,
|
||||
Err: err,
|
||||
}
|
||||
}
|
||||
|
||||
st.localLk.RUnlock()
|
||||
st.localLk.RUnlock()
|
||||
|
||||
for id, report := range toReport {
|
||||
if err := st.index.StorageReportHealth(ctx, id, report); err != nil {
|
||||
log.Warnf("error reporting storage health for %s (%+v): %+v", id, report, err)
|
||||
}
|
||||
for id, report := range toReport {
|
||||
if err := st.index.StorageReportHealth(ctx, id, report); err != nil {
|
||||
log.Warnf("error reporting storage health for %s (%+v): %+v", id, report, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -568,6 +572,8 @@ func (st *Local) removeSector(ctx context.Context, sid abi.SectorID, typ storifa
|
||||
log.Errorf("removing sector (%v) from %s: %+v", sid, spath, err)
|
||||
}
|
||||
|
||||
st.reportStorage(ctx) // report freed space
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -623,6 +629,8 @@ func (st *Local) MoveStorage(ctx context.Context, s abi.SectorID, ssize abi.Sect
|
||||
}
|
||||
}
|
||||
|
||||
st.reportStorage(ctx) // report space use changes
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user