Report memory used and swap used in worker res

Attempting to report "memory used by other processes" in the MemReserved
field fails to take into account the fact that the system's memory used
includes memory used by ongoing tasks.

To properly account for this, worker should report the memory and swap
used, then the scheduler that is aware of the memory requirements for a
task can determine if there is sufficient memory available for a task.
This commit is contained in:
Clint Armstrong 2021-09-09 17:41:59 -04:00 committed by Łukasz Magiera
parent e2a1ca7caa
commit c4f46171ae
9 changed files with 91 additions and 74 deletions

View File

@ -231,8 +231,9 @@ func init() {
Hostname: "host",
Resources: storiface.WorkerResources{
MemPhysical: 256 << 30,
MemUsed: 2 << 30,
MemSwap: 120 << 30,
MemReserved: 2 << 30,
MemSwapUsed: 2 << 30,
CPUs: 64,
GPUs: []string{"aGPU 1337"},
},

View File

@ -58,7 +58,7 @@ var (
FullAPIVersion1 = newVer(2, 1, 0)
MinerAPIVersion0 = newVer(1, 2, 0)
WorkerAPIVersion0 = newVer(1, 1, 0)
WorkerAPIVersion0 = newVer(1, 2, 0)
)
//nolint:varcheck,deadcode

View File

@ -4,6 +4,7 @@ import (
"encoding/hex"
"encoding/json"
"fmt"
"math"
"os"
"sort"
"strings"
@ -32,6 +33,17 @@ var sealingCmd = &cli.Command{
},
}
var barCols = float64(64)
func barString(total, y, g float64) string {
yBars := int(math.Round(y / total * barCols))
gBars := int(math.Round(g / total * barCols))
eBars := int(barCols) - yBars - gBars
return color.YellowString(strings.Repeat("|", yBars)) +
color.GreenString(strings.Repeat("|", gBars)) +
strings.Repeat(" ", eBars)
}
var sealingWorkersCmd = &cli.Command{
Name: "workers",
Usage: "list workers",
@ -89,55 +101,36 @@ var sealingWorkersCmd = &cli.Command{
fmt.Printf("Worker %s, host %s%s\n", stat.id, color.MagentaString(stat.Info.Hostname), disabled)
var barCols = uint64(64)
cpuBars := int(stat.CpuUse * barCols / stat.Info.Resources.CPUs)
cpuBar := strings.Repeat("|", cpuBars)
if int(barCols)-cpuBars >= 0 {
cpuBar += strings.Repeat(" ", int(barCols)-cpuBars)
}
fmt.Printf("\tCPU: [%s] %d/%d core(s) in use\n",
color.GreenString(cpuBar), stat.CpuUse, stat.Info.Resources.CPUs)
barString(float64(stat.Info.Resources.CPUs), 0, float64(stat.CpuUse)), stat.CpuUse, stat.Info.Resources.CPUs)
ramBarsRes := int(stat.Info.Resources.MemReserved * barCols / stat.Info.Resources.MemPhysical)
ramBarsUsed := int(stat.MemUsedMin * barCols / stat.Info.Resources.MemPhysical)
ramRepeatSpace := int(barCols) - (ramBarsUsed + ramBarsRes)
colorFunc := color.YellowString
if ramRepeatSpace < 0 {
ramRepeatSpace = 0
colorFunc = color.RedString
ramTotal := stat.Info.Resources.MemPhysical
ramTasks := stat.MemUsedMin
ramUsed := stat.Info.Resources.MemUsed
var ramReserved uint64 = 0
if ramUsed > ramTasks {
ramReserved = ramUsed - ramTasks
}
ramBar := colorFunc(strings.Repeat("|", ramBarsRes)) +
color.GreenString(strings.Repeat("|", ramBarsUsed)) +
strings.Repeat(" ", ramRepeatSpace)
vmem := stat.Info.Resources.MemPhysical + stat.Info.Resources.MemSwap
vmemBarsRes := int(stat.Info.Resources.MemReserved * barCols / vmem)
vmemBarsUsed := int(stat.MemUsedMax * barCols / vmem)
vmemRepeatSpace := int(barCols) - (vmemBarsUsed + vmemBarsRes)
colorFunc = color.YellowString
if vmemRepeatSpace < 0 {
vmemRepeatSpace = 0
colorFunc = color.RedString
}
vmemBar := colorFunc(strings.Repeat("|", vmemBarsRes)) +
color.GreenString(strings.Repeat("|", vmemBarsUsed)) +
strings.Repeat(" ", vmemRepeatSpace)
ramBar := barString(float64(ramTotal), float64(ramReserved), float64(ramTasks))
fmt.Printf("\tRAM: [%s] %d%% %s/%s\n", ramBar,
(stat.Info.Resources.MemReserved+stat.MemUsedMin)*100/stat.Info.Resources.MemPhysical,
types.SizeStr(types.NewInt(stat.Info.Resources.MemReserved+stat.MemUsedMin)),
(ramTasks+ramReserved)*100/stat.Info.Resources.MemPhysical,
types.SizeStr(types.NewInt(ramTasks+ramUsed)),
types.SizeStr(types.NewInt(stat.Info.Resources.MemPhysical)))
vmemTotal := stat.Info.Resources.MemPhysical + stat.Info.Resources.MemSwap
vmemTasks := stat.MemUsedMax
vmemUsed := stat.Info.Resources.MemUsed + stat.Info.Resources.MemSwapUsed
var vmemReserved uint64 = 0
if vmemUsed > vmemTasks {
vmemReserved = vmemUsed - vmemTasks
}
vmemBar := barString(float64(vmemTotal), float64(vmemReserved), float64(vmemTasks))
fmt.Printf("\tVMEM: [%s] %d%% %s/%s\n", vmemBar,
(stat.Info.Resources.MemReserved+stat.MemUsedMax)*100/vmem,
types.SizeStr(types.NewInt(stat.Info.Resources.MemReserved+stat.MemUsedMax)),
types.SizeStr(types.NewInt(vmem)))
(vmemTasks+vmemReserved)*100/vmemTotal,
types.SizeStr(types.NewInt(vmemTasks+vmemReserved)),
types.SizeStr(types.NewInt(vmemTotal)))
for _, gpu := range stat.Info.Resources.GPUs {
fmt.Printf("\tGPU: %s\n", color.New(gpuCol).Sprintf("%s, %sused", gpu, gpuUse))

View File

@ -58,8 +58,11 @@ var infoCmd = &cli.Command{
fmt.Printf("Hostname: %s\n", info.Hostname)
fmt.Printf("CPUs: %d; GPUs: %v\n", info.Resources.CPUs, info.Resources.GPUs)
fmt.Printf("RAM: %s; Swap: %s\n", types.SizeStr(types.NewInt(info.Resources.MemPhysical)), types.SizeStr(types.NewInt(info.Resources.MemSwap)))
fmt.Printf("Reserved memory: %s\n", types.SizeStr(types.NewInt(info.Resources.MemReserved)))
fmt.Printf("RAM: %s/%s; Swap: %s/%s\n",
types.SizeStr(types.NewInt(info.Resources.MemUsed)),
types.SizeStr(types.NewInt(info.Resources.MemPhysical)),
types.SizeStr(types.NewInt(info.Resources.MemSwapUsed)),
types.SizeStr(types.NewInt(info.Resources.MemSwap)))
fmt.Printf("Task types: ")
for _, t := range ttList(tt) {

View File

@ -61,17 +61,26 @@ func (a *activeResources) canHandleRequest(needRes Resources, wid WorkerID, call
}
res := info.Resources
// TODO: dedupe needRes.BaseMinMemory per task type (don't add if that task is already running)
minNeedMem := res.MemReserved + a.memUsedMin + needRes.MinMemory + needRes.BaseMinMemory
if minNeedMem > res.MemPhysical {
log.Debugf("sched: not scheduling on worker %s for %s; not enough physical memory - need: %dM, have %dM", wid, caller, minNeedMem/mib, res.MemPhysical/mib)
memNeeded := needRes.MinMemory + needRes.BaseMinMemory
memUsed := a.memUsedMin
// assume that MemUsed can be swapped, so only check it in the vmem Check
memAvail := res.MemPhysical - memUsed
if memNeeded > memAvail {
log.Debugf("sched: not scheduling on worker %s for %s; not enough physical memory - need: %dM, have %dM available", wid, caller, memNeeded/mib, memAvail/mib)
return false
}
maxNeedMem := res.MemReserved + a.memUsedMax + needRes.MaxMemory + needRes.BaseMinMemory
vmemNeeded := needRes.MaxMemory + needRes.BaseMinMemory
vmemUsed := a.memUsedMax
if vmemUsed < res.MemUsed+res.MemSwapUsed {
vmemUsed = res.MemUsed + res.MemSwapUsed
}
vmemAvail := res.MemPhysical + res.MemSwap - vmemUsed
if maxNeedMem > res.MemSwap+res.MemPhysical {
log.Debugf("sched: not scheduling on worker %s for %s; not enough virtual memory - need: %dM, have %dM", wid, caller, maxNeedMem/mib, (res.MemSwap+res.MemPhysical)/mib)
if vmemNeeded > vmemAvail {
log.Debugf("sched: not scheduling on worker %s for %s; not enough virtual memory - need: %dM, have %dM available", wid, caller, vmemNeeded/mib, vmemAvail/mib)
return false
}
@ -96,12 +105,21 @@ func (a *activeResources) utilization(wr storiface.WorkerResources) float64 {
cpu := float64(a.cpuUse) / float64(wr.CPUs)
max = cpu
memMin := float64(a.memUsedMin+wr.MemReserved) / float64(wr.MemPhysical)
memUsed := a.memUsedMin
if memUsed < wr.MemUsed {
memUsed = wr.MemUsed
}
memMin := float64(memUsed) / float64(wr.MemPhysical)
if memMin > max {
max = memMin
}
memMax := float64(a.memUsedMax+wr.MemReserved) / float64(wr.MemPhysical+wr.MemSwap)
vmemUsed := a.memUsedMax
if a.memUsedMax < wr.MemUsed+wr.MemSwapUsed {
vmemUsed = wr.MemUsed + wr.MemSwapUsed
}
memMax := float64(vmemUsed) / float64(wr.MemPhysical+wr.MemSwap)
if memMax > max {
max = memMax
}

View File

@ -41,14 +41,16 @@ func TestWithPriority(t *testing.T) {
var decentWorkerResources = storiface.WorkerResources{
MemPhysical: 128 << 30,
MemSwap: 200 << 30,
MemReserved: 2 << 30,
MemUsed: 1 << 30,
MemSwapUsed: 1 << 30,
CPUs: 32,
GPUs: []string{"a GPU"},
}
var constrainedWorkerResources = storiface.WorkerResources{
MemPhysical: 1 << 30,
MemReserved: 2 << 30,
MemUsed: 1 << 30,
MemSwapUsed: 1 << 30,
CPUs: 1,
}

View File

@ -28,9 +28,9 @@ type WorkerInfo struct {
type WorkerResources struct {
MemPhysical uint64
MemUsed uint64
MemSwap uint64
MemReserved uint64 // Used by system / other processes
MemSwapUsed uint64
CPUs uint64 // Logical cores
GPUs []string

View File

@ -108,8 +108,8 @@ func (t *testWorker) Info(ctx context.Context) (storiface.WorkerInfo, error) {
Hostname: "testworkerer",
Resources: storiface.WorkerResources{
MemPhysical: res.MinMemory * 3,
MemUsed: res.MinMemory,
MemSwap: 0,
MemReserved: res.MinMemory,
CPUs: 32,
GPUs: nil,
},

View File

@ -482,51 +482,50 @@ func (l *LocalWorker) Paths(ctx context.Context) ([]stores.StoragePath, error) {
return l.localStore.Local(ctx)
}
func (l *LocalWorker) memInfo() (memPhysical uint64, memVirtual uint64, memReserved uint64, err error) {
func (l *LocalWorker) memInfo() (memPhysical, memUsed, memSwap, memSwapUsed uint64, err error) {
h, err := sysinfo.Host()
if err != nil {
return 0, 0, 0, err
return 0, 0, 0, 0, err
}
mem, err := h.Memory()
if err != nil {
return 0, 0, 0, err
return 0, 0, 0, 0, err
}
memPhysical = mem.Total
memAvail := mem.Free
memSwap := mem.VirtualTotal
swapAvail := mem.VirtualFree
// mem.Available is memory available without swapping, it is more relevant for this calculation
memUsed = mem.Total - mem.Available
memSwap = mem.VirtualTotal
memSwapUsed = mem.VirtualUsed
if cgMemMax, cgMemUsed, cgSwapMax, cgSwapUsed, err := cgroupV1Mem(); err == nil {
if cgMemMax > 0 && cgMemMax < memPhysical {
memPhysical = cgMemMax
memAvail = cgMemMax - cgMemUsed
memUsed = cgMemUsed
}
if cgSwapMax > 0 && cgSwapMax < memSwap {
memSwap = cgSwapMax
swapAvail = cgSwapMax - cgSwapUsed
memSwapUsed = cgSwapUsed
}
}
if cgMemMax, cgMemUsed, cgSwapMax, cgSwapUsed, err := cgroupV2Mem(); err == nil {
if cgMemMax > 0 && cgMemMax < memPhysical {
memPhysical = cgMemMax
memAvail = cgMemMax - cgMemUsed
memUsed = cgMemUsed
}
if cgSwapMax > 0 && cgSwapMax < memSwap {
memSwap = cgSwapMax
swapAvail = cgSwapMax - cgSwapUsed
memSwapUsed = cgSwapUsed
}
}
if l.noSwap {
memSwap = 0
swapAvail = 0
memSwapUsed = 0
}
memReserved = memPhysical + memSwap - memAvail - swapAvail
return memPhysical, memSwap, memReserved, nil
return memPhysical, memUsed, memSwap, memSwapUsed, nil
}
func (l *LocalWorker) Info(context.Context) (storiface.WorkerInfo, error) {
@ -540,7 +539,7 @@ func (l *LocalWorker) Info(context.Context) (storiface.WorkerInfo, error) {
log.Errorf("getting gpu devices failed: %+v", err)
}
memPhysical, memSwap, memReserved, err := l.memInfo()
memPhysical, memUsed, memSwap, memSwapUsed, err := l.memInfo()
if err != nil {
return storiface.WorkerInfo{}, xerrors.Errorf("getting memory info: %w", err)
}
@ -550,8 +549,9 @@ func (l *LocalWorker) Info(context.Context) (storiface.WorkerInfo, error) {
IgnoreResources: l.ignoreResources,
Resources: storiface.WorkerResources{
MemPhysical: memPhysical,
MemUsed: memUsed,
MemSwap: memSwap,
MemReserved: memReserved,
MemSwapUsed: memSwapUsed,
CPUs: uint64(runtime.NumCPU()),
GPUs: gpus,
},