storage: Track abandoned work more correctly
This commit is contained in:
parent
f819e71d12
commit
5caa277261
@ -190,9 +190,11 @@ var sealingJobsCmd = &cli.Command{
|
||||
switch {
|
||||
case l.RunWait > 0:
|
||||
state = fmt.Sprintf("assigned(%d)", l.RunWait-1)
|
||||
case l.RunWait == -2:
|
||||
case l.RunWait == storiface.RWRetDone:
|
||||
state = "ret-done"
|
||||
case l.RunWait == storiface.RWReturned:
|
||||
state = "returned"
|
||||
case l.RunWait == -1:
|
||||
case l.RunWait == storiface.RWRetWait:
|
||||
state = "ret-wait"
|
||||
}
|
||||
dur := "n/a"
|
||||
|
17
extern/sector-storage/manager_calltracker.go
vendored
17
extern/sector-storage/manager_calltracker.go
vendored
@ -89,8 +89,7 @@ func (m *Manager) setupWorkTracker() {
|
||||
log.Errorf("cleannig up work state for %s", wid)
|
||||
}
|
||||
case wsDone:
|
||||
// realistically this shouldn't ever happen as we return results
|
||||
// immediately after getting them
|
||||
// can happen after restart, abandoning work, and another restart
|
||||
log.Warnf("dropping done work, no result, wid %s", wid)
|
||||
|
||||
if err := m.work.Get(wid).End(); err != nil {
|
||||
@ -393,6 +392,20 @@ func (m *Manager) returnResult(callID storiface.CallID, r interface{}, serr stri
|
||||
|
||||
m.results[wid] = res
|
||||
|
||||
err = m.work.Get(wid).Mutate(func(ws *WorkState) error {
|
||||
ws.Status = wsDone
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
// in the unlikely case:
|
||||
// * manager has restarted, and we're still tracking this work, and
|
||||
// * the work is abandoned (storage-fsm doesn't do a matching call on the sector), and
|
||||
// * the call is returned from the worker, and
|
||||
// * this errors
|
||||
// the user will get jobs stuck in ret-wait state
|
||||
log.Errorf("marking work as done: %+v", err)
|
||||
}
|
||||
|
||||
_, found := m.waitRes[wid]
|
||||
if found {
|
||||
close(m.waitRes[wid])
|
||||
|
7
extern/sector-storage/stats.go
vendored
7
extern/sector-storage/stats.go
vendored
@ -72,9 +72,12 @@ func (m *Manager) WorkerJobs() map[uuid.UUID][]storiface.WorkerJob {
|
||||
log.Errorf("WorkerJobs: get work %s: %+v", work, err)
|
||||
}
|
||||
|
||||
wait := -1
|
||||
wait := storiface.RWRetWait
|
||||
if _, ok := m.results[work]; ok {
|
||||
wait = -2 // mark as returned instead of ret-wait
|
||||
wait = storiface.RWReturned
|
||||
}
|
||||
if ws.Status == wsDone {
|
||||
wait = storiface.RWRetDone
|
||||
}
|
||||
|
||||
out[uuid.UUID{}] = append(out[uuid.UUID{}], storiface.WorkerJob{
|
||||
|
13
extern/sector-storage/storiface/worker.go
vendored
13
extern/sector-storage/storiface/worker.go
vendored
@ -41,12 +41,23 @@ type WorkerStats struct {
|
||||
CpuUse uint64 // nolint
|
||||
}
|
||||
|
||||
const (
|
||||
RWRetWait = -1
|
||||
RWReturned = -2
|
||||
RWRetDone = -3
|
||||
)
|
||||
|
||||
type WorkerJob struct {
|
||||
ID CallID
|
||||
Sector abi.SectorID
|
||||
Task sealtasks.TaskType
|
||||
|
||||
RunWait int // -2 - returned, -1 - ret-wait, 0 - running, 1+ - assigned
|
||||
// 1+ - assigned
|
||||
// 0 - running
|
||||
// -1 - ret-wait
|
||||
// -2 - returned
|
||||
// -3 - ret-done
|
||||
RunWait int
|
||||
Start time.Time
|
||||
|
||||
Hostname string `json:",omitempty"` // optional, set for ret-wait jobs
|
||||
|
Loading…
Reference in New Issue
Block a user