Merge pull request #4804 from filecoin-project/feat/storage-retwait-cleanup

Expand sched-diag; Command to abort sealing calls
This commit is contained in:
Aayush Rajasekaran 2020-11-12 02:11:15 -05:00 committed by GitHub
commit 3a3986320c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 130 additions and 11 deletions

View File

@ -71,6 +71,7 @@ type StorageMiner interface {
// SealingSchedDiag dumps internal sealing scheduler state
SealingSchedDiag(ctx context.Context, doSched bool) (interface{}, error)
SealingAbort(ctx context.Context, call storiface.CallID) error
stores.SectorIndex

View File

@ -323,7 +323,8 @@ type StorageMinerStruct struct {
ReturnReadPiece func(ctx context.Context, callID storiface.CallID, ok bool, err string) error `perm:"admin" retry:"true"`
ReturnFetch func(ctx context.Context, callID storiface.CallID, err string) error `perm:"admin" retry:"true"`
SealingSchedDiag func(context.Context, bool) (interface{}, error) `perm:"admin"`
SealingSchedDiag func(context.Context, bool) (interface{}, error) `perm:"admin"`
SealingAbort func(ctx context.Context, call storiface.CallID) error `perm:"admin"`
StorageList func(context.Context) (map[stores.ID][]stores.Decl, error) `perm:"admin"`
StorageLocal func(context.Context) (map[stores.ID]string, error) `perm:"admin"`
@ -1318,6 +1319,10 @@ func (c *StorageMinerStruct) SealingSchedDiag(ctx context.Context, doSched bool)
return c.Internal.SealingSchedDiag(ctx, doSched)
}
func (c *StorageMinerStruct) SealingAbort(ctx context.Context, call storiface.CallID) error {
return c.Internal.SealingAbort(ctx, call)
}
func (c *StorageMinerStruct) StorageAttach(ctx context.Context, si stores.StorageInfo, st fsutil.FsStat) error {
return c.Internal.StorageAttach(ctx, si, st)
}

View File

@ -28,6 +28,7 @@ var sealingCmd = &cli.Command{
sealingJobsCmd,
sealingWorkersCmd,
sealingSchedDiagCmd,
sealingAbortCmd,
},
}
@ -124,9 +125,13 @@ var sealingWorkersCmd = &cli.Command{
var sealingJobsCmd = &cli.Command{
Name: "jobs",
Usage: "list workers",
Usage: "list running jobs",
Flags: []cli.Flag{
&cli.BoolFlag{Name: "color"},
&cli.BoolFlag{
Name: "show-ret-done",
Usage: "show returned but not consumed calls",
},
},
Action: func(cctx *cli.Context) error {
color.NoColor = !cctx.Bool("color")
@ -191,6 +196,9 @@ var sealingJobsCmd = &cli.Command{
case l.RunWait > 0:
state = fmt.Sprintf("assigned(%d)", l.RunWait-1)
case l.RunWait == storiface.RWRetDone:
if !cctx.Bool("show-ret-done") {
continue
}
state = "ret-done"
case l.RunWait == storiface.RWReturned:
state = "returned"
@ -208,9 +216,9 @@ var sealingJobsCmd = &cli.Command{
}
_, _ = fmt.Fprintf(tw, "%s\t%d\t%s\t%s\t%s\t%s\t%s\n",
hex.EncodeToString(l.ID.ID[10:]),
hex.EncodeToString(l.ID.ID[:4]),
l.Sector.Number,
hex.EncodeToString(l.wid[5:]),
hex.EncodeToString(l.wid[:4]),
hostname,
l.Task.Short(),
state,
@ -253,3 +261,47 @@ var sealingSchedDiagCmd = &cli.Command{
return nil
},
}
var sealingAbortCmd = &cli.Command{
Name: "abort",
Usage: "Abort a running job",
ArgsUsage: "[callid]",
Action: func(cctx *cli.Context) error {
if cctx.Args().Len() != 1 {
return xerrors.Errorf("expected 1 argument")
}
nodeApi, closer, err := lcli.GetStorageMinerAPI(cctx)
if err != nil {
return err
}
defer closer()
ctx := lcli.ReqContext(cctx)
jobs, err := nodeApi.WorkerJobs(ctx)
if err != nil {
return xerrors.Errorf("getting worker jobs: %w", err)
}
var job *storiface.WorkerJob
outer:
for _, workerJobs := range jobs {
for _, j := range workerJobs {
if strings.HasPrefix(j.ID.ID.String(), cctx.Args().First()) {
j := j
job = &j
break outer
}
}
}
if job == nil {
return xerrors.Errorf("job with specified id prefix not found")
}
fmt.Printf("aborting job %s, task %s, sector %d, running on host %s\n", job.ID.String(), job.Task.Short(), job.Sector.Number, job.Hostname)
return nodeApi.SealingAbort(ctx, job.ID)
},
}

View File

@ -220,7 +220,9 @@ func (m *Manager) readPiece(sink io.Writer, sector abi.SectorID, offset storifac
if err != nil {
return err
}
*rok = r.(bool)
if r != nil {
*rok = r.(bool)
}
return nil
}
}
@ -342,7 +344,9 @@ func (m *Manager) AddPiece(ctx context.Context, sector abi.SectorID, existingPie
if err != nil {
return err
}
out = p.(abi.PieceInfo)
if p != nil {
out = p.(abi.PieceInfo)
}
return nil
})
@ -366,7 +370,9 @@ func (m *Manager) SealPreCommit1(ctx context.Context, sector abi.SectorID, ticke
waitErr = werr
return
}
out = p.(storage.PreCommit1Out)
if p != nil {
out = p.(storage.PreCommit1Out)
}
}
if wait { // already in progress
@ -415,7 +421,9 @@ func (m *Manager) SealPreCommit2(ctx context.Context, sector abi.SectorID, phase
waitErr = werr
return
}
out = p.(storage.SectorCids)
if p != nil {
out = p.(storage.SectorCids)
}
}
if wait { // already in progress
@ -462,7 +470,9 @@ func (m *Manager) SealCommit1(ctx context.Context, sector abi.SectorID, ticket a
waitErr = werr
return
}
out = p.(storage.Commit1Out)
if p != nil {
out = p.(storage.Commit1Out)
}
}
if wait { // already in progress
@ -509,7 +519,9 @@ func (m *Manager) SealCommit2(ctx context.Context, sector abi.SectorID, phase1Ou
waitErr = werr
return
}
out = p.(storage.Proof)
if p != nil {
out = p.(storage.Proof)
}
}
if wait { // already in progress
@ -688,7 +700,48 @@ func (m *Manager) SchedDiag(ctx context.Context, doSched bool) (interface{}, err
}
}
return m.sched.Info(ctx)
si, err := m.sched.Info(ctx)
if err != nil {
return nil, err
}
type SchedInfo interface{}
i := struct {
SchedInfo
ReturnedWork []string
Waiting []string
CallToWork map[string]string
EarlyRet []string
}{
SchedInfo: si,
CallToWork: map[string]string{},
}
m.workLk.Lock()
for w := range m.results {
i.ReturnedWork = append(i.ReturnedWork, w.String())
}
for id := range m.callRes {
i.EarlyRet = append(i.EarlyRet, id.String())
}
for w := range m.waitRes {
i.Waiting = append(i.Waiting, w.String())
}
for c, w := range m.callToWork {
i.CallToWork[c.String()] = w.String()
}
m.workLk.Unlock()
return i, nil
}
func (m *Manager) Close(ctx context.Context) error {

View File

@ -414,3 +414,7 @@ func (m *Manager) returnResult(callID storiface.CallID, r interface{}, serr stri
return nil
}
func (m *Manager) Abort(ctx context.Context, call storiface.CallID) error {
return m.returnResult(call, nil, "task aborted")
}

View File

@ -300,6 +300,10 @@ func (sm *StorageMinerAPI) SealingSchedDiag(ctx context.Context, doSched bool) (
return sm.StorageMgr.SchedDiag(ctx, doSched)
}
func (sm *StorageMinerAPI) SealingAbort(ctx context.Context, call storiface.CallID) error {
return sm.StorageMgr.Abort(ctx, call)
}
func (sm *StorageMinerAPI) MarketImportDealData(ctx context.Context, propCid cid.Cid, path string) error {
fi, err := os.Open(path)
if err != nil {