2020-03-23 12:29:24 +00:00
|
|
|
package main
|
2020-03-23 14:56:22 +00:00
|
|
|
|
|
|
|
import (
|
2020-09-23 17:26:26 +00:00
|
|
|
"encoding/hex"
|
2020-07-27 11:23:43 +00:00
|
|
|
"encoding/json"
|
2020-03-23 14:56:22 +00:00
|
|
|
"fmt"
|
2021-09-09 21:41:59 +00:00
|
|
|
"math"
|
2020-07-21 18:07:49 +00:00
|
|
|
"os"
|
2020-03-23 14:56:22 +00:00
|
|
|
"sort"
|
2020-05-01 12:06:19 +00:00
|
|
|
"strings"
|
2020-07-21 18:07:49 +00:00
|
|
|
"text/tabwriter"
|
|
|
|
"time"
|
2020-05-01 12:06:19 +00:00
|
|
|
|
|
|
|
"github.com/fatih/color"
|
2020-10-18 10:35:44 +00:00
|
|
|
"github.com/google/uuid"
|
2020-06-02 18:12:53 +00:00
|
|
|
"github.com/urfave/cli/v2"
|
2020-09-23 17:26:26 +00:00
|
|
|
"golang.org/x/xerrors"
|
2020-03-23 14:56:22 +00:00
|
|
|
|
2020-08-17 13:26:18 +00:00
|
|
|
"github.com/filecoin-project/lotus/extern/sector-storage/storiface"
|
2020-05-01 12:06:19 +00:00
|
|
|
|
|
|
|
"github.com/filecoin-project/lotus/chain/types"
|
2020-03-23 14:56:22 +00:00
|
|
|
lcli "github.com/filecoin-project/lotus/cli"
|
|
|
|
)
|
|
|
|
|
2020-07-21 18:07:49 +00:00
|
|
|
var sealingCmd = &cli.Command{
|
|
|
|
Name: "sealing",
|
|
|
|
Usage: "interact with sealing pipeline",
|
2020-03-23 14:56:22 +00:00
|
|
|
Subcommands: []*cli.Command{
|
2020-07-21 18:07:49 +00:00
|
|
|
sealingJobsCmd,
|
|
|
|
sealingWorkersCmd,
|
2020-07-27 11:23:43 +00:00
|
|
|
sealingSchedDiagCmd,
|
2020-11-11 16:39:12 +00:00
|
|
|
sealingAbortCmd,
|
2020-03-23 14:56:22 +00:00
|
|
|
},
|
|
|
|
}
|
|
|
|
|
2021-09-09 21:41:59 +00:00
|
|
|
var barCols = float64(64)
|
|
|
|
|
|
|
|
func barString(total, y, g float64) string {
|
|
|
|
yBars := int(math.Round(y / total * barCols))
|
|
|
|
gBars := int(math.Round(g / total * barCols))
|
|
|
|
eBars := int(barCols) - yBars - gBars
|
2022-02-28 08:51:15 +00:00
|
|
|
var barString = color.YellowString(strings.Repeat("|", yBars)) +
|
|
|
|
color.GreenString(strings.Repeat("|", gBars))
|
|
|
|
if eBars >= 0 {
|
|
|
|
barString += strings.Repeat(" ", eBars)
|
|
|
|
}
|
|
|
|
return barString
|
2021-09-09 21:41:59 +00:00
|
|
|
}
|
|
|
|
|
2020-07-21 18:07:49 +00:00
|
|
|
var sealingWorkersCmd = &cli.Command{
|
|
|
|
Name: "workers",
|
2020-03-23 14:56:22 +00:00
|
|
|
Usage: "list workers",
|
2020-05-01 12:06:19 +00:00
|
|
|
Flags: []cli.Flag{
|
2021-07-07 16:12:17 +00:00
|
|
|
&cli.BoolFlag{
|
2021-07-08 08:44:13 +00:00
|
|
|
Name: "color",
|
2021-07-13 10:19:55 +00:00
|
|
|
Usage: "use color in display output",
|
2021-07-08 08:44:13 +00:00
|
|
|
DefaultText: "depends on output being a TTY",
|
2021-07-07 16:12:17 +00:00
|
|
|
},
|
2020-05-01 12:06:19 +00:00
|
|
|
},
|
2020-03-23 14:56:22 +00:00
|
|
|
Action: func(cctx *cli.Context) error {
|
2021-07-13 10:19:55 +00:00
|
|
|
if cctx.IsSet("color") {
|
|
|
|
color.NoColor = !cctx.Bool("color")
|
|
|
|
}
|
2020-05-01 12:06:19 +00:00
|
|
|
|
2020-03-23 14:56:22 +00:00
|
|
|
nodeApi, closer, err := lcli.GetStorageMinerAPI(cctx)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
defer closer()
|
|
|
|
|
|
|
|
ctx := lcli.ReqContext(cctx)
|
|
|
|
|
|
|
|
stats, err := nodeApi.WorkerStats(ctx)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2020-03-24 23:49:45 +00:00
|
|
|
type sortableStat struct {
|
2020-10-18 10:35:44 +00:00
|
|
|
id uuid.UUID
|
2020-04-23 22:23:20 +00:00
|
|
|
storiface.WorkerStats
|
2020-03-24 23:49:45 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
st := make([]sortableStat, 0, len(stats))
|
2020-03-23 14:56:22 +00:00
|
|
|
for id, stat := range stats {
|
2020-03-24 23:49:45 +00:00
|
|
|
st = append(st, sortableStat{id, stat})
|
2020-03-23 14:56:22 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
sort.Slice(st, func(i, j int) bool {
|
2020-10-18 10:35:44 +00:00
|
|
|
return st[i].id.String() < st[j].id.String()
|
2020-03-23 14:56:22 +00:00
|
|
|
})
|
|
|
|
|
|
|
|
for _, stat := range st {
|
|
|
|
gpuUse := "not "
|
2020-05-01 12:06:19 +00:00
|
|
|
gpuCol := color.FgBlue
|
2021-09-01 01:59:25 +00:00
|
|
|
if stat.GpuUsed > 0 {
|
2020-05-01 12:06:19 +00:00
|
|
|
gpuCol = color.FgGreen
|
2020-03-23 14:56:22 +00:00
|
|
|
gpuUse = ""
|
|
|
|
}
|
|
|
|
|
2020-10-18 11:03:17 +00:00
|
|
|
var disabled string
|
|
|
|
if !stat.Enabled {
|
|
|
|
disabled = color.RedString(" (disabled)")
|
|
|
|
}
|
|
|
|
|
|
|
|
fmt.Printf("Worker %s, host %s%s\n", stat.id, color.MagentaString(stat.Info.Hostname), disabled)
|
2020-05-01 12:06:19 +00:00
|
|
|
|
2020-10-12 07:07:56 +00:00
|
|
|
fmt.Printf("\tCPU: [%s] %d/%d core(s) in use\n",
|
2021-09-09 21:41:59 +00:00
|
|
|
barString(float64(stat.Info.Resources.CPUs), 0, float64(stat.CpuUse)), stat.CpuUse, stat.Info.Resources.CPUs)
|
|
|
|
|
|
|
|
ramTotal := stat.Info.Resources.MemPhysical
|
|
|
|
ramTasks := stat.MemUsedMin
|
|
|
|
ramUsed := stat.Info.Resources.MemUsed
|
|
|
|
var ramReserved uint64 = 0
|
|
|
|
if ramUsed > ramTasks {
|
|
|
|
ramReserved = ramUsed - ramTasks
|
2021-08-17 12:07:32 +00:00
|
|
|
}
|
2021-09-09 21:41:59 +00:00
|
|
|
ramBar := barString(float64(ramTotal), float64(ramReserved), float64(ramTasks))
|
2020-05-01 12:06:19 +00:00
|
|
|
|
|
|
|
fmt.Printf("\tRAM: [%s] %d%% %s/%s\n", ramBar,
|
2021-09-09 21:41:59 +00:00
|
|
|
(ramTasks+ramReserved)*100/stat.Info.Resources.MemPhysical,
|
|
|
|
types.SizeStr(types.NewInt(ramTasks+ramUsed)),
|
2020-05-01 12:06:19 +00:00
|
|
|
types.SizeStr(types.NewInt(stat.Info.Resources.MemPhysical)))
|
|
|
|
|
2021-09-09 21:41:59 +00:00
|
|
|
vmemTotal := stat.Info.Resources.MemPhysical + stat.Info.Resources.MemSwap
|
|
|
|
vmemTasks := stat.MemUsedMax
|
|
|
|
vmemUsed := stat.Info.Resources.MemUsed + stat.Info.Resources.MemSwapUsed
|
|
|
|
var vmemReserved uint64 = 0
|
|
|
|
if vmemUsed > vmemTasks {
|
|
|
|
vmemReserved = vmemUsed - vmemTasks
|
|
|
|
}
|
|
|
|
vmemBar := barString(float64(vmemTotal), float64(vmemReserved), float64(vmemTasks))
|
|
|
|
|
2020-05-01 12:06:19 +00:00
|
|
|
fmt.Printf("\tVMEM: [%s] %d%% %s/%s\n", vmemBar,
|
2021-09-09 21:41:59 +00:00
|
|
|
(vmemTasks+vmemReserved)*100/vmemTotal,
|
|
|
|
types.SizeStr(types.NewInt(vmemTasks+vmemReserved)),
|
|
|
|
types.SizeStr(types.NewInt(vmemTotal)))
|
2020-03-23 14:56:22 +00:00
|
|
|
|
2021-09-01 01:59:25 +00:00
|
|
|
if len(stat.Info.Resources.GPUs) > 0 {
|
|
|
|
gpuBar := barString(float64(len(stat.Info.Resources.GPUs)), 0, stat.GpuUsed)
|
|
|
|
fmt.Printf("\tGPU: [%s] %.f%% %.2f/%d gpu(s) in use\n", color.GreenString(gpuBar),
|
|
|
|
stat.GpuUsed*100/float64(len(stat.Info.Resources.GPUs)),
|
|
|
|
stat.GpuUsed, len(stat.Info.Resources.GPUs))
|
|
|
|
}
|
2020-03-23 14:56:22 +00:00
|
|
|
for _, gpu := range stat.Info.Resources.GPUs {
|
2020-05-01 12:06:19 +00:00
|
|
|
fmt.Printf("\tGPU: %s\n", color.New(gpuCol).Sprintf("%s, %sused", gpu, gpuUse))
|
2020-03-23 14:56:22 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
},
|
2020-03-23 22:43:38 +00:00
|
|
|
}
|
2020-07-21 18:07:49 +00:00
|
|
|
|
|
|
|
var sealingJobsCmd = &cli.Command{
|
|
|
|
Name: "jobs",
|
2020-11-11 16:39:12 +00:00
|
|
|
Usage: "list running jobs",
|
2020-07-21 18:07:49 +00:00
|
|
|
Flags: []cli.Flag{
|
2021-07-07 16:12:17 +00:00
|
|
|
&cli.BoolFlag{
|
2021-07-08 08:44:13 +00:00
|
|
|
Name: "color",
|
2021-07-13 10:19:55 +00:00
|
|
|
Usage: "use color in display output",
|
2021-07-08 08:44:13 +00:00
|
|
|
DefaultText: "depends on output being a TTY",
|
2021-07-07 16:12:17 +00:00
|
|
|
},
|
2020-11-11 15:48:04 +00:00
|
|
|
&cli.BoolFlag{
|
2020-11-11 16:39:31 +00:00
|
|
|
Name: "show-ret-done",
|
2020-11-11 15:48:04 +00:00
|
|
|
Usage: "show returned but not consumed calls",
|
|
|
|
},
|
2020-07-21 18:07:49 +00:00
|
|
|
},
|
|
|
|
Action: func(cctx *cli.Context) error {
|
2021-07-13 10:19:55 +00:00
|
|
|
if cctx.IsSet("color") {
|
|
|
|
color.NoColor = !cctx.Bool("color")
|
|
|
|
}
|
2020-07-21 18:07:49 +00:00
|
|
|
|
|
|
|
nodeApi, closer, err := lcli.GetStorageMinerAPI(cctx)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
defer closer()
|
|
|
|
|
|
|
|
ctx := lcli.ReqContext(cctx)
|
|
|
|
|
|
|
|
jobs, err := nodeApi.WorkerJobs(ctx)
|
|
|
|
if err != nil {
|
|
|
|
return xerrors.Errorf("getting worker jobs: %w", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
type line struct {
|
|
|
|
storiface.WorkerJob
|
2020-10-18 10:35:44 +00:00
|
|
|
wid uuid.UUID
|
2020-07-21 18:07:49 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
lines := make([]line, 0)
|
|
|
|
|
|
|
|
for wid, jobs := range jobs {
|
|
|
|
for _, job := range jobs {
|
|
|
|
lines = append(lines, line{
|
|
|
|
WorkerJob: job,
|
|
|
|
wid: wid,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// oldest first
|
|
|
|
sort.Slice(lines, func(i, j int) bool {
|
2020-08-27 21:14:33 +00:00
|
|
|
if lines[i].RunWait != lines[j].RunWait {
|
2020-08-28 16:26:17 +00:00
|
|
|
return lines[i].RunWait < lines[j].RunWait
|
2020-08-27 21:14:33 +00:00
|
|
|
}
|
2020-09-24 09:55:11 +00:00
|
|
|
if lines[i].Start.Equal(lines[j].Start) {
|
|
|
|
return lines[i].ID.ID.String() < lines[j].ID.ID.String()
|
|
|
|
}
|
2020-07-21 18:07:49 +00:00
|
|
|
return lines[i].Start.Before(lines[j].Start)
|
|
|
|
})
|
|
|
|
|
2020-10-18 10:35:44 +00:00
|
|
|
workerHostnames := map[uuid.UUID]string{}
|
2020-07-21 18:07:49 +00:00
|
|
|
|
|
|
|
wst, err := nodeApi.WorkerStats(ctx)
|
|
|
|
if err != nil {
|
|
|
|
return xerrors.Errorf("getting worker stats: %w", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
for wid, st := range wst {
|
2020-10-18 10:35:44 +00:00
|
|
|
workerHostnames[wid] = st.Info.Hostname
|
2020-07-21 18:07:49 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
tw := tabwriter.NewWriter(os.Stdout, 2, 4, 2, ' ', 0)
|
2020-08-27 21:14:33 +00:00
|
|
|
_, _ = fmt.Fprintf(tw, "ID\tSector\tWorker\tHostname\tTask\tState\tTime\n")
|
2020-07-21 18:07:49 +00:00
|
|
|
|
|
|
|
for _, l := range lines {
|
2020-08-28 16:26:17 +00:00
|
|
|
state := "running"
|
2020-11-09 22:13:29 +00:00
|
|
|
switch {
|
2021-10-15 19:26:35 +00:00
|
|
|
case l.RunWait > 1:
|
2020-08-28 16:26:17 +00:00
|
|
|
state = fmt.Sprintf("assigned(%d)", l.RunWait-1)
|
2021-10-15 19:26:35 +00:00
|
|
|
case l.RunWait == storiface.RWPrepared:
|
|
|
|
state = "prepared"
|
2020-11-09 22:38:20 +00:00
|
|
|
case l.RunWait == storiface.RWRetDone:
|
2020-11-11 15:48:04 +00:00
|
|
|
if !cctx.Bool("show-ret-done") {
|
|
|
|
continue
|
|
|
|
}
|
2020-11-09 22:38:20 +00:00
|
|
|
state = "ret-done"
|
|
|
|
case l.RunWait == storiface.RWReturned:
|
2020-11-09 22:13:29 +00:00
|
|
|
state = "returned"
|
2020-11-09 22:38:20 +00:00
|
|
|
case l.RunWait == storiface.RWRetWait:
|
2020-09-23 17:26:26 +00:00
|
|
|
state = "ret-wait"
|
|
|
|
}
|
|
|
|
dur := "n/a"
|
|
|
|
if !l.Start.IsZero() {
|
|
|
|
dur = time.Now().Sub(l.Start).Truncate(time.Millisecond * 100).String()
|
|
|
|
}
|
|
|
|
|
2020-11-09 22:09:04 +00:00
|
|
|
hostname, ok := workerHostnames[l.wid]
|
|
|
|
if !ok {
|
|
|
|
hostname = l.Hostname
|
|
|
|
}
|
|
|
|
|
2020-10-18 11:03:17 +00:00
|
|
|
_, _ = fmt.Fprintf(tw, "%s\t%d\t%s\t%s\t%s\t%s\t%s\n",
|
2020-11-11 16:39:12 +00:00
|
|
|
hex.EncodeToString(l.ID.ID[:4]),
|
2020-10-18 11:03:17 +00:00
|
|
|
l.Sector.Number,
|
2020-11-11 16:39:12 +00:00
|
|
|
hex.EncodeToString(l.wid[:4]),
|
2020-11-09 22:09:04 +00:00
|
|
|
hostname,
|
2020-10-18 11:03:17 +00:00
|
|
|
l.Task.Short(),
|
|
|
|
state,
|
|
|
|
dur)
|
2020-07-21 18:07:49 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return tw.Flush()
|
|
|
|
},
|
|
|
|
}
|
2020-07-27 11:23:43 +00:00
|
|
|
|
|
|
|
var sealingSchedDiagCmd = &cli.Command{
|
|
|
|
Name: "sched-diag",
|
|
|
|
Usage: "Dump internal scheduler state",
|
2020-10-30 10:07:35 +00:00
|
|
|
Flags: []cli.Flag{
|
|
|
|
&cli.BoolFlag{
|
|
|
|
Name: "force-sched",
|
|
|
|
},
|
|
|
|
},
|
2020-07-27 11:23:43 +00:00
|
|
|
Action: func(cctx *cli.Context) error {
|
|
|
|
nodeApi, closer, err := lcli.GetStorageMinerAPI(cctx)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
defer closer()
|
|
|
|
|
|
|
|
ctx := lcli.ReqContext(cctx)
|
|
|
|
|
2020-10-30 10:07:35 +00:00
|
|
|
st, err := nodeApi.SealingSchedDiag(ctx, cctx.Bool("force-sched"))
|
2020-07-27 11:23:43 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
j, err := json.MarshalIndent(&st, "", " ")
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
fmt.Println(string(j))
|
|
|
|
|
|
|
|
return nil
|
|
|
|
},
|
|
|
|
}
|
2020-11-11 16:39:12 +00:00
|
|
|
|
|
|
|
var sealingAbortCmd = &cli.Command{
|
2020-11-11 16:39:31 +00:00
|
|
|
Name: "abort",
|
|
|
|
Usage: "Abort a running job",
|
2020-11-12 07:04:55 +00:00
|
|
|
ArgsUsage: "[callid]",
|
2020-11-11 16:39:12 +00:00
|
|
|
Action: func(cctx *cli.Context) error {
|
|
|
|
if cctx.Args().Len() != 1 {
|
|
|
|
return xerrors.Errorf("expected 1 argument")
|
|
|
|
}
|
|
|
|
|
|
|
|
nodeApi, closer, err := lcli.GetStorageMinerAPI(cctx)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
defer closer()
|
|
|
|
|
|
|
|
ctx := lcli.ReqContext(cctx)
|
|
|
|
|
|
|
|
jobs, err := nodeApi.WorkerJobs(ctx)
|
|
|
|
if err != nil {
|
|
|
|
return xerrors.Errorf("getting worker jobs: %w", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
var job *storiface.WorkerJob
|
|
|
|
outer:
|
|
|
|
for _, workerJobs := range jobs {
|
|
|
|
for _, j := range workerJobs {
|
|
|
|
if strings.HasPrefix(j.ID.ID.String(), cctx.Args().First()) {
|
2020-11-11 16:39:31 +00:00
|
|
|
j := j
|
2020-11-11 16:39:12 +00:00
|
|
|
job = &j
|
|
|
|
break outer
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if job == nil {
|
|
|
|
return xerrors.Errorf("job with specified id prefix not found")
|
|
|
|
}
|
|
|
|
|
|
|
|
fmt.Printf("aborting job %s, task %s, sector %d, running on host %s\n", job.ID.String(), job.Task.Short(), job.Sector.Number, job.Hostname)
|
|
|
|
|
|
|
|
return nodeApi.SealingAbort(ctx, job.ID)
|
|
|
|
},
|
|
|
|
}
|