storage: Track abandoned work more correctly
This commit is contained in:
parent
f819e71d12
commit
5caa277261
@ -190,9 +190,11 @@ var sealingJobsCmd = &cli.Command{
|
|||||||
switch {
|
switch {
|
||||||
case l.RunWait > 0:
|
case l.RunWait > 0:
|
||||||
state = fmt.Sprintf("assigned(%d)", l.RunWait-1)
|
state = fmt.Sprintf("assigned(%d)", l.RunWait-1)
|
||||||
case l.RunWait == -2:
|
case l.RunWait == storiface.RWRetDone:
|
||||||
|
state = "ret-done"
|
||||||
|
case l.RunWait == storiface.RWReturned:
|
||||||
state = "returned"
|
state = "returned"
|
||||||
case l.RunWait == -1:
|
case l.RunWait == storiface.RWRetWait:
|
||||||
state = "ret-wait"
|
state = "ret-wait"
|
||||||
}
|
}
|
||||||
dur := "n/a"
|
dur := "n/a"
|
||||||
|
17
extern/sector-storage/manager_calltracker.go
vendored
17
extern/sector-storage/manager_calltracker.go
vendored
@ -89,8 +89,7 @@ func (m *Manager) setupWorkTracker() {
|
|||||||
log.Errorf("cleannig up work state for %s", wid)
|
log.Errorf("cleannig up work state for %s", wid)
|
||||||
}
|
}
|
||||||
case wsDone:
|
case wsDone:
|
||||||
// realistically this shouldn't ever happen as we return results
|
// can happen after restart, abandoning work, and another restart
|
||||||
// immediately after getting them
|
|
||||||
log.Warnf("dropping done work, no result, wid %s", wid)
|
log.Warnf("dropping done work, no result, wid %s", wid)
|
||||||
|
|
||||||
if err := m.work.Get(wid).End(); err != nil {
|
if err := m.work.Get(wid).End(); err != nil {
|
||||||
@ -393,6 +392,20 @@ func (m *Manager) returnResult(callID storiface.CallID, r interface{}, serr stri
|
|||||||
|
|
||||||
m.results[wid] = res
|
m.results[wid] = res
|
||||||
|
|
||||||
|
err = m.work.Get(wid).Mutate(func(ws *WorkState) error {
|
||||||
|
ws.Status = wsDone
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
// in the unlikely case:
|
||||||
|
// * manager has restarted, and we're still tracking this work, and
|
||||||
|
// * the work is abandoned (storage-fsm doesn't do a matching call on the sector), and
|
||||||
|
// * the call is returned from the worker, and
|
||||||
|
// * this errors
|
||||||
|
// the user will get jobs stuck in ret-wait state
|
||||||
|
log.Errorf("marking work as done: %+v", err)
|
||||||
|
}
|
||||||
|
|
||||||
_, found := m.waitRes[wid]
|
_, found := m.waitRes[wid]
|
||||||
if found {
|
if found {
|
||||||
close(m.waitRes[wid])
|
close(m.waitRes[wid])
|
||||||
|
7
extern/sector-storage/stats.go
vendored
7
extern/sector-storage/stats.go
vendored
@ -72,9 +72,12 @@ func (m *Manager) WorkerJobs() map[uuid.UUID][]storiface.WorkerJob {
|
|||||||
log.Errorf("WorkerJobs: get work %s: %+v", work, err)
|
log.Errorf("WorkerJobs: get work %s: %+v", work, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
wait := -1
|
wait := storiface.RWRetWait
|
||||||
if _, ok := m.results[work]; ok {
|
if _, ok := m.results[work]; ok {
|
||||||
wait = -2 // mark as returned instead of ret-wait
|
wait = storiface.RWReturned
|
||||||
|
}
|
||||||
|
if ws.Status == wsDone {
|
||||||
|
wait = storiface.RWRetDone
|
||||||
}
|
}
|
||||||
|
|
||||||
out[uuid.UUID{}] = append(out[uuid.UUID{}], storiface.WorkerJob{
|
out[uuid.UUID{}] = append(out[uuid.UUID{}], storiface.WorkerJob{
|
||||||
|
13
extern/sector-storage/storiface/worker.go
vendored
13
extern/sector-storage/storiface/worker.go
vendored
@ -41,12 +41,23 @@ type WorkerStats struct {
|
|||||||
CpuUse uint64 // nolint
|
CpuUse uint64 // nolint
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const (
|
||||||
|
RWRetWait = -1
|
||||||
|
RWReturned = -2
|
||||||
|
RWRetDone = -3
|
||||||
|
)
|
||||||
|
|
||||||
type WorkerJob struct {
|
type WorkerJob struct {
|
||||||
ID CallID
|
ID CallID
|
||||||
Sector abi.SectorID
|
Sector abi.SectorID
|
||||||
Task sealtasks.TaskType
|
Task sealtasks.TaskType
|
||||||
|
|
||||||
RunWait int // -2 - returned, -1 - ret-wait, 0 - running, 1+ - assigned
|
// 1+ - assigned
|
||||||
|
// 0 - running
|
||||||
|
// -1 - ret-wait
|
||||||
|
// -2 - returned
|
||||||
|
// -3 - ret-done
|
||||||
|
RunWait int
|
||||||
Start time.Time
|
Start time.Time
|
||||||
|
|
||||||
Hostname string `json:",omitempty"` // optional, set for ret-wait jobs
|
Hostname string `json:",omitempty"` // optional, set for ret-wait jobs
|
||||||
|
Loading…
Reference in New Issue
Block a user