Report memory used and swap used in worker res
Attempting to report "memory used by other processes" in the MemReserved field fails to take into account the fact that the system's memory used includes memory used by ongoing tasks. To properly account for this, worker should report the memory and swap used, then the scheduler that is aware of the memory requirements for a task can determine if there is sufficient memory available for a task.
This commit is contained in:
parent
e2a1ca7caa
commit
c4f46171ae
@ -231,8 +231,9 @@ func init() {
|
||||
Hostname: "host",
|
||||
Resources: storiface.WorkerResources{
|
||||
MemPhysical: 256 << 30,
|
||||
MemUsed: 2 << 30,
|
||||
MemSwap: 120 << 30,
|
||||
MemReserved: 2 << 30,
|
||||
MemSwapUsed: 2 << 30,
|
||||
CPUs: 64,
|
||||
GPUs: []string{"aGPU 1337"},
|
||||
},
|
||||
|
@ -58,7 +58,7 @@ var (
|
||||
FullAPIVersion1 = newVer(2, 1, 0)
|
||||
|
||||
MinerAPIVersion0 = newVer(1, 2, 0)
|
||||
WorkerAPIVersion0 = newVer(1, 1, 0)
|
||||
WorkerAPIVersion0 = newVer(1, 2, 0)
|
||||
)
|
||||
|
||||
//nolint:varcheck,deadcode
|
||||
|
@ -4,6 +4,7 @@ import (
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"math"
|
||||
"os"
|
||||
"sort"
|
||||
"strings"
|
||||
@ -32,6 +33,17 @@ var sealingCmd = &cli.Command{
|
||||
},
|
||||
}
|
||||
|
||||
var barCols = float64(64)
|
||||
|
||||
func barString(total, y, g float64) string {
|
||||
yBars := int(math.Round(y / total * barCols))
|
||||
gBars := int(math.Round(g / total * barCols))
|
||||
eBars := int(barCols) - yBars - gBars
|
||||
return color.YellowString(strings.Repeat("|", yBars)) +
|
||||
color.GreenString(strings.Repeat("|", gBars)) +
|
||||
strings.Repeat(" ", eBars)
|
||||
}
|
||||
|
||||
var sealingWorkersCmd = &cli.Command{
|
||||
Name: "workers",
|
||||
Usage: "list workers",
|
||||
@ -89,55 +101,36 @@ var sealingWorkersCmd = &cli.Command{
|
||||
|
||||
fmt.Printf("Worker %s, host %s%s\n", stat.id, color.MagentaString(stat.Info.Hostname), disabled)
|
||||
|
||||
var barCols = uint64(64)
|
||||
cpuBars := int(stat.CpuUse * barCols / stat.Info.Resources.CPUs)
|
||||
cpuBar := strings.Repeat("|", cpuBars)
|
||||
if int(barCols)-cpuBars >= 0 {
|
||||
cpuBar += strings.Repeat(" ", int(barCols)-cpuBars)
|
||||
}
|
||||
|
||||
fmt.Printf("\tCPU: [%s] %d/%d core(s) in use\n",
|
||||
color.GreenString(cpuBar), stat.CpuUse, stat.Info.Resources.CPUs)
|
||||
barString(float64(stat.Info.Resources.CPUs), 0, float64(stat.CpuUse)), stat.CpuUse, stat.Info.Resources.CPUs)
|
||||
|
||||
ramBarsRes := int(stat.Info.Resources.MemReserved * barCols / stat.Info.Resources.MemPhysical)
|
||||
ramBarsUsed := int(stat.MemUsedMin * barCols / stat.Info.Resources.MemPhysical)
|
||||
ramRepeatSpace := int(barCols) - (ramBarsUsed + ramBarsRes)
|
||||
|
||||
colorFunc := color.YellowString
|
||||
if ramRepeatSpace < 0 {
|
||||
ramRepeatSpace = 0
|
||||
colorFunc = color.RedString
|
||||
ramTotal := stat.Info.Resources.MemPhysical
|
||||
ramTasks := stat.MemUsedMin
|
||||
ramUsed := stat.Info.Resources.MemUsed
|
||||
var ramReserved uint64 = 0
|
||||
if ramUsed > ramTasks {
|
||||
ramReserved = ramUsed - ramTasks
|
||||
}
|
||||
|
||||
ramBar := colorFunc(strings.Repeat("|", ramBarsRes)) +
|
||||
color.GreenString(strings.Repeat("|", ramBarsUsed)) +
|
||||
strings.Repeat(" ", ramRepeatSpace)
|
||||
|
||||
vmem := stat.Info.Resources.MemPhysical + stat.Info.Resources.MemSwap
|
||||
|
||||
vmemBarsRes := int(stat.Info.Resources.MemReserved * barCols / vmem)
|
||||
vmemBarsUsed := int(stat.MemUsedMax * barCols / vmem)
|
||||
vmemRepeatSpace := int(barCols) - (vmemBarsUsed + vmemBarsRes)
|
||||
|
||||
colorFunc = color.YellowString
|
||||
if vmemRepeatSpace < 0 {
|
||||
vmemRepeatSpace = 0
|
||||
colorFunc = color.RedString
|
||||
}
|
||||
|
||||
vmemBar := colorFunc(strings.Repeat("|", vmemBarsRes)) +
|
||||
color.GreenString(strings.Repeat("|", vmemBarsUsed)) +
|
||||
strings.Repeat(" ", vmemRepeatSpace)
|
||||
ramBar := barString(float64(ramTotal), float64(ramReserved), float64(ramTasks))
|
||||
|
||||
fmt.Printf("\tRAM: [%s] %d%% %s/%s\n", ramBar,
|
||||
(stat.Info.Resources.MemReserved+stat.MemUsedMin)*100/stat.Info.Resources.MemPhysical,
|
||||
types.SizeStr(types.NewInt(stat.Info.Resources.MemReserved+stat.MemUsedMin)),
|
||||
(ramTasks+ramReserved)*100/stat.Info.Resources.MemPhysical,
|
||||
types.SizeStr(types.NewInt(ramTasks+ramUsed)),
|
||||
types.SizeStr(types.NewInt(stat.Info.Resources.MemPhysical)))
|
||||
|
||||
vmemTotal := stat.Info.Resources.MemPhysical + stat.Info.Resources.MemSwap
|
||||
vmemTasks := stat.MemUsedMax
|
||||
vmemUsed := stat.Info.Resources.MemUsed + stat.Info.Resources.MemSwapUsed
|
||||
var vmemReserved uint64 = 0
|
||||
if vmemUsed > vmemTasks {
|
||||
vmemReserved = vmemUsed - vmemTasks
|
||||
}
|
||||
vmemBar := barString(float64(vmemTotal), float64(vmemReserved), float64(vmemTasks))
|
||||
|
||||
fmt.Printf("\tVMEM: [%s] %d%% %s/%s\n", vmemBar,
|
||||
(stat.Info.Resources.MemReserved+stat.MemUsedMax)*100/vmem,
|
||||
types.SizeStr(types.NewInt(stat.Info.Resources.MemReserved+stat.MemUsedMax)),
|
||||
types.SizeStr(types.NewInt(vmem)))
|
||||
(vmemTasks+vmemReserved)*100/vmemTotal,
|
||||
types.SizeStr(types.NewInt(vmemTasks+vmemReserved)),
|
||||
types.SizeStr(types.NewInt(vmemTotal)))
|
||||
|
||||
for _, gpu := range stat.Info.Resources.GPUs {
|
||||
fmt.Printf("\tGPU: %s\n", color.New(gpuCol).Sprintf("%s, %sused", gpu, gpuUse))
|
||||
|
@ -58,8 +58,11 @@ var infoCmd = &cli.Command{
|
||||
|
||||
fmt.Printf("Hostname: %s\n", info.Hostname)
|
||||
fmt.Printf("CPUs: %d; GPUs: %v\n", info.Resources.CPUs, info.Resources.GPUs)
|
||||
fmt.Printf("RAM: %s; Swap: %s\n", types.SizeStr(types.NewInt(info.Resources.MemPhysical)), types.SizeStr(types.NewInt(info.Resources.MemSwap)))
|
||||
fmt.Printf("Reserved memory: %s\n", types.SizeStr(types.NewInt(info.Resources.MemReserved)))
|
||||
fmt.Printf("RAM: %s/%s; Swap: %s/%s\n",
|
||||
types.SizeStr(types.NewInt(info.Resources.MemUsed)),
|
||||
types.SizeStr(types.NewInt(info.Resources.MemPhysical)),
|
||||
types.SizeStr(types.NewInt(info.Resources.MemSwapUsed)),
|
||||
types.SizeStr(types.NewInt(info.Resources.MemSwap)))
|
||||
|
||||
fmt.Printf("Task types: ")
|
||||
for _, t := range ttList(tt) {
|
||||
|
34
extern/sector-storage/sched_resources.go
vendored
34
extern/sector-storage/sched_resources.go
vendored
@ -61,17 +61,26 @@ func (a *activeResources) canHandleRequest(needRes Resources, wid WorkerID, call
|
||||
}
|
||||
|
||||
res := info.Resources
|
||||
|
||||
// TODO: dedupe needRes.BaseMinMemory per task type (don't add if that task is already running)
|
||||
minNeedMem := res.MemReserved + a.memUsedMin + needRes.MinMemory + needRes.BaseMinMemory
|
||||
if minNeedMem > res.MemPhysical {
|
||||
log.Debugf("sched: not scheduling on worker %s for %s; not enough physical memory - need: %dM, have %dM", wid, caller, minNeedMem/mib, res.MemPhysical/mib)
|
||||
memNeeded := needRes.MinMemory + needRes.BaseMinMemory
|
||||
memUsed := a.memUsedMin
|
||||
// assume that MemUsed can be swapped, so only check it in the vmem Check
|
||||
memAvail := res.MemPhysical - memUsed
|
||||
if memNeeded > memAvail {
|
||||
log.Debugf("sched: not scheduling on worker %s for %s; not enough physical memory - need: %dM, have %dM available", wid, caller, memNeeded/mib, memAvail/mib)
|
||||
return false
|
||||
}
|
||||
|
||||
maxNeedMem := res.MemReserved + a.memUsedMax + needRes.MaxMemory + needRes.BaseMinMemory
|
||||
vmemNeeded := needRes.MaxMemory + needRes.BaseMinMemory
|
||||
vmemUsed := a.memUsedMax
|
||||
if vmemUsed < res.MemUsed+res.MemSwapUsed {
|
||||
vmemUsed = res.MemUsed + res.MemSwapUsed
|
||||
}
|
||||
vmemAvail := res.MemPhysical + res.MemSwap - vmemUsed
|
||||
|
||||
if maxNeedMem > res.MemSwap+res.MemPhysical {
|
||||
log.Debugf("sched: not scheduling on worker %s for %s; not enough virtual memory - need: %dM, have %dM", wid, caller, maxNeedMem/mib, (res.MemSwap+res.MemPhysical)/mib)
|
||||
if vmemNeeded > vmemAvail {
|
||||
log.Debugf("sched: not scheduling on worker %s for %s; not enough virtual memory - need: %dM, have %dM available", wid, caller, vmemNeeded/mib, vmemAvail/mib)
|
||||
return false
|
||||
}
|
||||
|
||||
@ -96,12 +105,21 @@ func (a *activeResources) utilization(wr storiface.WorkerResources) float64 {
|
||||
cpu := float64(a.cpuUse) / float64(wr.CPUs)
|
||||
max = cpu
|
||||
|
||||
memMin := float64(a.memUsedMin+wr.MemReserved) / float64(wr.MemPhysical)
|
||||
memUsed := a.memUsedMin
|
||||
if memUsed < wr.MemUsed {
|
||||
memUsed = wr.MemUsed
|
||||
}
|
||||
memMin := float64(memUsed) / float64(wr.MemPhysical)
|
||||
if memMin > max {
|
||||
max = memMin
|
||||
}
|
||||
|
||||
memMax := float64(a.memUsedMax+wr.MemReserved) / float64(wr.MemPhysical+wr.MemSwap)
|
||||
vmemUsed := a.memUsedMax
|
||||
if a.memUsedMax < wr.MemUsed+wr.MemSwapUsed {
|
||||
vmemUsed = wr.MemUsed + wr.MemSwapUsed
|
||||
}
|
||||
memMax := float64(vmemUsed) / float64(wr.MemPhysical+wr.MemSwap)
|
||||
|
||||
if memMax > max {
|
||||
max = memMax
|
||||
}
|
||||
|
6
extern/sector-storage/sched_test.go
vendored
6
extern/sector-storage/sched_test.go
vendored
@ -41,14 +41,16 @@ func TestWithPriority(t *testing.T) {
|
||||
var decentWorkerResources = storiface.WorkerResources{
|
||||
MemPhysical: 128 << 30,
|
||||
MemSwap: 200 << 30,
|
||||
MemReserved: 2 << 30,
|
||||
MemUsed: 1 << 30,
|
||||
MemSwapUsed: 1 << 30,
|
||||
CPUs: 32,
|
||||
GPUs: []string{"a GPU"},
|
||||
}
|
||||
|
||||
var constrainedWorkerResources = storiface.WorkerResources{
|
||||
MemPhysical: 1 << 30,
|
||||
MemReserved: 2 << 30,
|
||||
MemUsed: 1 << 30,
|
||||
MemSwapUsed: 1 << 30,
|
||||
CPUs: 1,
|
||||
}
|
||||
|
||||
|
4
extern/sector-storage/storiface/worker.go
vendored
4
extern/sector-storage/storiface/worker.go
vendored
@ -28,9 +28,9 @@ type WorkerInfo struct {
|
||||
|
||||
type WorkerResources struct {
|
||||
MemPhysical uint64
|
||||
MemUsed uint64
|
||||
MemSwap uint64
|
||||
|
||||
MemReserved uint64 // Used by system / other processes
|
||||
MemSwapUsed uint64
|
||||
|
||||
CPUs uint64 // Logical cores
|
||||
GPUs []string
|
||||
|
2
extern/sector-storage/testworker_test.go
vendored
2
extern/sector-storage/testworker_test.go
vendored
@ -108,8 +108,8 @@ func (t *testWorker) Info(ctx context.Context) (storiface.WorkerInfo, error) {
|
||||
Hostname: "testworkerer",
|
||||
Resources: storiface.WorkerResources{
|
||||
MemPhysical: res.MinMemory * 3,
|
||||
MemUsed: res.MinMemory,
|
||||
MemSwap: 0,
|
||||
MemReserved: res.MinMemory,
|
||||
CPUs: 32,
|
||||
GPUs: nil,
|
||||
},
|
||||
|
32
extern/sector-storage/worker_local.go
vendored
32
extern/sector-storage/worker_local.go
vendored
@ -482,51 +482,50 @@ func (l *LocalWorker) Paths(ctx context.Context) ([]stores.StoragePath, error) {
|
||||
return l.localStore.Local(ctx)
|
||||
}
|
||||
|
||||
func (l *LocalWorker) memInfo() (memPhysical uint64, memVirtual uint64, memReserved uint64, err error) {
|
||||
func (l *LocalWorker) memInfo() (memPhysical, memUsed, memSwap, memSwapUsed uint64, err error) {
|
||||
h, err := sysinfo.Host()
|
||||
if err != nil {
|
||||
return 0, 0, 0, err
|
||||
return 0, 0, 0, 0, err
|
||||
}
|
||||
|
||||
mem, err := h.Memory()
|
||||
if err != nil {
|
||||
return 0, 0, 0, err
|
||||
return 0, 0, 0, 0, err
|
||||
}
|
||||
memPhysical = mem.Total
|
||||
memAvail := mem.Free
|
||||
memSwap := mem.VirtualTotal
|
||||
swapAvail := mem.VirtualFree
|
||||
// mem.Available is memory available without swapping, it is more relevant for this calculation
|
||||
memUsed = mem.Total - mem.Available
|
||||
memSwap = mem.VirtualTotal
|
||||
memSwapUsed = mem.VirtualUsed
|
||||
|
||||
if cgMemMax, cgMemUsed, cgSwapMax, cgSwapUsed, err := cgroupV1Mem(); err == nil {
|
||||
if cgMemMax > 0 && cgMemMax < memPhysical {
|
||||
memPhysical = cgMemMax
|
||||
memAvail = cgMemMax - cgMemUsed
|
||||
memUsed = cgMemUsed
|
||||
}
|
||||
if cgSwapMax > 0 && cgSwapMax < memSwap {
|
||||
memSwap = cgSwapMax
|
||||
swapAvail = cgSwapMax - cgSwapUsed
|
||||
memSwapUsed = cgSwapUsed
|
||||
}
|
||||
}
|
||||
|
||||
if cgMemMax, cgMemUsed, cgSwapMax, cgSwapUsed, err := cgroupV2Mem(); err == nil {
|
||||
if cgMemMax > 0 && cgMemMax < memPhysical {
|
||||
memPhysical = cgMemMax
|
||||
memAvail = cgMemMax - cgMemUsed
|
||||
memUsed = cgMemUsed
|
||||
}
|
||||
if cgSwapMax > 0 && cgSwapMax < memSwap {
|
||||
memSwap = cgSwapMax
|
||||
swapAvail = cgSwapMax - cgSwapUsed
|
||||
memSwapUsed = cgSwapUsed
|
||||
}
|
||||
}
|
||||
|
||||
if l.noSwap {
|
||||
memSwap = 0
|
||||
swapAvail = 0
|
||||
memSwapUsed = 0
|
||||
}
|
||||
|
||||
memReserved = memPhysical + memSwap - memAvail - swapAvail
|
||||
|
||||
return memPhysical, memSwap, memReserved, nil
|
||||
return memPhysical, memUsed, memSwap, memSwapUsed, nil
|
||||
}
|
||||
|
||||
func (l *LocalWorker) Info(context.Context) (storiface.WorkerInfo, error) {
|
||||
@ -540,7 +539,7 @@ func (l *LocalWorker) Info(context.Context) (storiface.WorkerInfo, error) {
|
||||
log.Errorf("getting gpu devices failed: %+v", err)
|
||||
}
|
||||
|
||||
memPhysical, memSwap, memReserved, err := l.memInfo()
|
||||
memPhysical, memUsed, memSwap, memSwapUsed, err := l.memInfo()
|
||||
if err != nil {
|
||||
return storiface.WorkerInfo{}, xerrors.Errorf("getting memory info: %w", err)
|
||||
}
|
||||
@ -550,8 +549,9 @@ func (l *LocalWorker) Info(context.Context) (storiface.WorkerInfo, error) {
|
||||
IgnoreResources: l.ignoreResources,
|
||||
Resources: storiface.WorkerResources{
|
||||
MemPhysical: memPhysical,
|
||||
MemUsed: memUsed,
|
||||
MemSwap: memSwap,
|
||||
MemReserved: memReserved,
|
||||
MemSwapUsed: memSwapUsed,
|
||||
CPUs: uint64(runtime.NumCPU()),
|
||||
GPUs: gpus,
|
||||
},
|
||||
|
Loading…
Reference in New Issue
Block a user