sectorstorage: Cancel non-running work in case of abort in sched
This commit is contained in:
parent
46a5beafe4
commit
6855284d88
12
extern/sector-storage/manager.go
vendored
12
extern/sector-storage/manager.go
vendored
@ -364,10 +364,11 @@ func (m *Manager) SealPreCommit1(ctx context.Context, sector abi.SectorID, ticke
|
|||||||
ctx, cancel := context.WithCancel(ctx)
|
ctx, cancel := context.WithCancel(ctx)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
||||||
wk, wait, err := m.getWork(ctx, sealtasks.TTPreCommit1, sector, ticket, pieces)
|
wk, wait, cancel, err := m.getWork(ctx, sealtasks.TTPreCommit1, sector, ticket, pieces)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, xerrors.Errorf("getWork: %w", err)
|
return nil, xerrors.Errorf("getWork: %w", err)
|
||||||
}
|
}
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
waitRes := func() {
|
waitRes := func() {
|
||||||
p, werr := m.waitWork(ctx, wk)
|
p, werr := m.waitWork(ctx, wk)
|
||||||
@ -408,10 +409,11 @@ func (m *Manager) SealPreCommit2(ctx context.Context, sector abi.SectorID, phase
|
|||||||
ctx, cancel := context.WithCancel(ctx)
|
ctx, cancel := context.WithCancel(ctx)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
||||||
wk, wait, err := m.getWork(ctx, sealtasks.TTPreCommit2, sector, phase1Out)
|
wk, wait, cancel, err := m.getWork(ctx, sealtasks.TTPreCommit2, sector, phase1Out)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return storage.SectorCids{}, xerrors.Errorf("getWork: %w", err)
|
return storage.SectorCids{}, xerrors.Errorf("getWork: %w", err)
|
||||||
}
|
}
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
waitRes := func() {
|
waitRes := func() {
|
||||||
p, werr := m.waitWork(ctx, wk)
|
p, werr := m.waitWork(ctx, wk)
|
||||||
@ -449,10 +451,11 @@ func (m *Manager) SealCommit1(ctx context.Context, sector abi.SectorID, ticket a
|
|||||||
ctx, cancel := context.WithCancel(ctx)
|
ctx, cancel := context.WithCancel(ctx)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
||||||
wk, wait, err := m.getWork(ctx, sealtasks.TTCommit1, sector, ticket, seed, pieces, cids)
|
wk, wait, cancel, err := m.getWork(ctx, sealtasks.TTCommit1, sector, ticket, seed, pieces, cids)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return storage.Commit1Out{}, xerrors.Errorf("getWork: %w", err)
|
return storage.Commit1Out{}, xerrors.Errorf("getWork: %w", err)
|
||||||
}
|
}
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
waitRes := func() {
|
waitRes := func() {
|
||||||
p, werr := m.waitWork(ctx, wk)
|
p, werr := m.waitWork(ctx, wk)
|
||||||
@ -490,10 +493,11 @@ func (m *Manager) SealCommit1(ctx context.Context, sector abi.SectorID, ticket a
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (m *Manager) SealCommit2(ctx context.Context, sector abi.SectorID, phase1Out storage.Commit1Out) (out storage.Proof, err error) {
|
func (m *Manager) SealCommit2(ctx context.Context, sector abi.SectorID, phase1Out storage.Commit1Out) (out storage.Proof, err error) {
|
||||||
wk, wait, err := m.getWork(ctx, sealtasks.TTCommit2, sector, phase1Out)
|
wk, wait, cancel, err := m.getWork(ctx, sealtasks.TTCommit2, sector, phase1Out)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return storage.Proof{}, xerrors.Errorf("getWork: %w", err)
|
return storage.Proof{}, xerrors.Errorf("getWork: %w", err)
|
||||||
}
|
}
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
waitRes := func() {
|
waitRes := func() {
|
||||||
p, werr := m.waitWork(ctx, wk)
|
p, werr := m.waitWork(ctx, wk)
|
||||||
|
49
extern/sector-storage/manager_calltracker.go
vendored
49
extern/sector-storage/manager_calltracker.go
vendored
@ -102,10 +102,10 @@ func (m *Manager) setupWorkTracker() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// returns wait=true when the task is already tracked/running
|
// returns wait=true when the task is already tracked/running
|
||||||
func (m *Manager) getWork(ctx context.Context, method sealtasks.TaskType, params ...interface{}) (wid WorkID, wait bool, err error) {
|
func (m *Manager) getWork(ctx context.Context, method sealtasks.TaskType, params ...interface{}) (wid WorkID, wait bool, cancel func(), err error) {
|
||||||
wid, err = newWorkID(method, params)
|
wid, err = newWorkID(method, params)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return WorkID{}, false, xerrors.Errorf("creating WorkID: %w", err)
|
return WorkID{}, false, nil, xerrors.Errorf("creating WorkID: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
m.workLk.Lock()
|
m.workLk.Lock()
|
||||||
@ -113,7 +113,7 @@ func (m *Manager) getWork(ctx context.Context, method sealtasks.TaskType, params
|
|||||||
|
|
||||||
have, err := m.work.Has(wid)
|
have, err := m.work.Has(wid)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return WorkID{}, false, xerrors.Errorf("failed to check if the task is already tracked: %w", err)
|
return WorkID{}, false, nil, xerrors.Errorf("failed to check if the task is already tracked: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if !have {
|
if !have {
|
||||||
@ -122,15 +122,52 @@ func (m *Manager) getWork(ctx context.Context, method sealtasks.TaskType, params
|
|||||||
Status: wsStarted,
|
Status: wsStarted,
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return WorkID{}, false, xerrors.Errorf("failed to track task start: %w", err)
|
return WorkID{}, false, nil, xerrors.Errorf("failed to track task start: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
return wid, false, nil
|
return wid, false, func() {
|
||||||
|
m.workLk.Lock()
|
||||||
|
defer m.workLk.Unlock()
|
||||||
|
|
||||||
|
have, err := m.work.Has(wid)
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("cancel: work has error: %+v", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if !have {
|
||||||
|
return // expected / happy path
|
||||||
|
}
|
||||||
|
|
||||||
|
var ws WorkState
|
||||||
|
if err := m.work.Get(wid).Get(&ws); err != nil {
|
||||||
|
log.Errorf("cancel: get work %s: %+v", wid, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
switch ws.Status {
|
||||||
|
case wsStarted:
|
||||||
|
log.Warn("canceling started (not running) work %s", wid)
|
||||||
|
|
||||||
|
if err := m.work.Get(wid).End(); err != nil {
|
||||||
|
log.Errorf("cancel: failed to cancel started work %s: %+v", wid, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
case wsDone:
|
||||||
|
// TODO: still remove?
|
||||||
|
log.Warn("cancel called on work %s in 'done' state", wid)
|
||||||
|
case wsRunning:
|
||||||
|
log.Warn("cancel called on work %s in 'running' state (manager shutting down?)", wid)
|
||||||
|
}
|
||||||
|
|
||||||
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// already started
|
// already started
|
||||||
|
|
||||||
return wid, true, nil
|
return wid, true, func() {
|
||||||
|
// TODO
|
||||||
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *Manager) startWork(ctx context.Context, wk WorkID) func(callID storiface.CallID, err error) error {
|
func (m *Manager) startWork(ctx context.Context, wk WorkID) func(callID storiface.CallID, err error) error {
|
||||||
|
Loading…
Reference in New Issue
Block a user