Report memory used and swap used in worker res

Attempting to report "memory used by other processes" in the MemReserved
field fails to take into account the fact that the system's memory used
includes memory used by ongoing tasks.

To properly account for this, worker should report the memory and swap
used, then the scheduler that is aware of the memory requirements for a
task can determine if there is sufficient memory available for a task.
This commit is contained in:
Clint Armstrong 2021-09-09 17:41:59 -04:00 committed by Łukasz Magiera
parent e2a1ca7caa
commit c4f46171ae
9 changed files with 91 additions and 74 deletions

View File

@ -231,8 +231,9 @@ func init() {
Hostname: "host", Hostname: "host",
Resources: storiface.WorkerResources{ Resources: storiface.WorkerResources{
MemPhysical: 256 << 30, MemPhysical: 256 << 30,
MemUsed: 2 << 30,
MemSwap: 120 << 30, MemSwap: 120 << 30,
MemReserved: 2 << 30, MemSwapUsed: 2 << 30,
CPUs: 64, CPUs: 64,
GPUs: []string{"aGPU 1337"}, GPUs: []string{"aGPU 1337"},
}, },

View File

@ -58,7 +58,7 @@ var (
FullAPIVersion1 = newVer(2, 1, 0) FullAPIVersion1 = newVer(2, 1, 0)
MinerAPIVersion0 = newVer(1, 2, 0) MinerAPIVersion0 = newVer(1, 2, 0)
WorkerAPIVersion0 = newVer(1, 1, 0) WorkerAPIVersion0 = newVer(1, 2, 0)
) )
//nolint:varcheck,deadcode //nolint:varcheck,deadcode

View File

@ -4,6 +4,7 @@ import (
"encoding/hex" "encoding/hex"
"encoding/json" "encoding/json"
"fmt" "fmt"
"math"
"os" "os"
"sort" "sort"
"strings" "strings"
@ -32,6 +33,17 @@ var sealingCmd = &cli.Command{
}, },
} }
var barCols = float64(64)
func barString(total, y, g float64) string {
yBars := int(math.Round(y / total * barCols))
gBars := int(math.Round(g / total * barCols))
eBars := int(barCols) - yBars - gBars
return color.YellowString(strings.Repeat("|", yBars)) +
color.GreenString(strings.Repeat("|", gBars)) +
strings.Repeat(" ", eBars)
}
var sealingWorkersCmd = &cli.Command{ var sealingWorkersCmd = &cli.Command{
Name: "workers", Name: "workers",
Usage: "list workers", Usage: "list workers",
@ -89,55 +101,36 @@ var sealingWorkersCmd = &cli.Command{
fmt.Printf("Worker %s, host %s%s\n", stat.id, color.MagentaString(stat.Info.Hostname), disabled) fmt.Printf("Worker %s, host %s%s\n", stat.id, color.MagentaString(stat.Info.Hostname), disabled)
var barCols = uint64(64)
cpuBars := int(stat.CpuUse * barCols / stat.Info.Resources.CPUs)
cpuBar := strings.Repeat("|", cpuBars)
if int(barCols)-cpuBars >= 0 {
cpuBar += strings.Repeat(" ", int(barCols)-cpuBars)
}
fmt.Printf("\tCPU: [%s] %d/%d core(s) in use\n", fmt.Printf("\tCPU: [%s] %d/%d core(s) in use\n",
color.GreenString(cpuBar), stat.CpuUse, stat.Info.Resources.CPUs) barString(float64(stat.Info.Resources.CPUs), 0, float64(stat.CpuUse)), stat.CpuUse, stat.Info.Resources.CPUs)
ramBarsRes := int(stat.Info.Resources.MemReserved * barCols / stat.Info.Resources.MemPhysical) ramTotal := stat.Info.Resources.MemPhysical
ramBarsUsed := int(stat.MemUsedMin * barCols / stat.Info.Resources.MemPhysical) ramTasks := stat.MemUsedMin
ramRepeatSpace := int(barCols) - (ramBarsUsed + ramBarsRes) ramUsed := stat.Info.Resources.MemUsed
var ramReserved uint64 = 0
colorFunc := color.YellowString if ramUsed > ramTasks {
if ramRepeatSpace < 0 { ramReserved = ramUsed - ramTasks
ramRepeatSpace = 0
colorFunc = color.RedString
} }
ramBar := barString(float64(ramTotal), float64(ramReserved), float64(ramTasks))
ramBar := colorFunc(strings.Repeat("|", ramBarsRes)) +
color.GreenString(strings.Repeat("|", ramBarsUsed)) +
strings.Repeat(" ", ramRepeatSpace)
vmem := stat.Info.Resources.MemPhysical + stat.Info.Resources.MemSwap
vmemBarsRes := int(stat.Info.Resources.MemReserved * barCols / vmem)
vmemBarsUsed := int(stat.MemUsedMax * barCols / vmem)
vmemRepeatSpace := int(barCols) - (vmemBarsUsed + vmemBarsRes)
colorFunc = color.YellowString
if vmemRepeatSpace < 0 {
vmemRepeatSpace = 0
colorFunc = color.RedString
}
vmemBar := colorFunc(strings.Repeat("|", vmemBarsRes)) +
color.GreenString(strings.Repeat("|", vmemBarsUsed)) +
strings.Repeat(" ", vmemRepeatSpace)
fmt.Printf("\tRAM: [%s] %d%% %s/%s\n", ramBar, fmt.Printf("\tRAM: [%s] %d%% %s/%s\n", ramBar,
(stat.Info.Resources.MemReserved+stat.MemUsedMin)*100/stat.Info.Resources.MemPhysical, (ramTasks+ramReserved)*100/stat.Info.Resources.MemPhysical,
types.SizeStr(types.NewInt(stat.Info.Resources.MemReserved+stat.MemUsedMin)), types.SizeStr(types.NewInt(ramTasks+ramUsed)),
types.SizeStr(types.NewInt(stat.Info.Resources.MemPhysical))) types.SizeStr(types.NewInt(stat.Info.Resources.MemPhysical)))
vmemTotal := stat.Info.Resources.MemPhysical + stat.Info.Resources.MemSwap
vmemTasks := stat.MemUsedMax
vmemUsed := stat.Info.Resources.MemUsed + stat.Info.Resources.MemSwapUsed
var vmemReserved uint64 = 0
if vmemUsed > vmemTasks {
vmemReserved = vmemUsed - vmemTasks
}
vmemBar := barString(float64(vmemTotal), float64(vmemReserved), float64(vmemTasks))
fmt.Printf("\tVMEM: [%s] %d%% %s/%s\n", vmemBar, fmt.Printf("\tVMEM: [%s] %d%% %s/%s\n", vmemBar,
(stat.Info.Resources.MemReserved+stat.MemUsedMax)*100/vmem, (vmemTasks+vmemReserved)*100/vmemTotal,
types.SizeStr(types.NewInt(stat.Info.Resources.MemReserved+stat.MemUsedMax)), types.SizeStr(types.NewInt(vmemTasks+vmemReserved)),
types.SizeStr(types.NewInt(vmem))) types.SizeStr(types.NewInt(vmemTotal)))
for _, gpu := range stat.Info.Resources.GPUs { for _, gpu := range stat.Info.Resources.GPUs {
fmt.Printf("\tGPU: %s\n", color.New(gpuCol).Sprintf("%s, %sused", gpu, gpuUse)) fmt.Printf("\tGPU: %s\n", color.New(gpuCol).Sprintf("%s, %sused", gpu, gpuUse))

View File

@ -58,8 +58,11 @@ var infoCmd = &cli.Command{
fmt.Printf("Hostname: %s\n", info.Hostname) fmt.Printf("Hostname: %s\n", info.Hostname)
fmt.Printf("CPUs: %d; GPUs: %v\n", info.Resources.CPUs, info.Resources.GPUs) fmt.Printf("CPUs: %d; GPUs: %v\n", info.Resources.CPUs, info.Resources.GPUs)
fmt.Printf("RAM: %s; Swap: %s\n", types.SizeStr(types.NewInt(info.Resources.MemPhysical)), types.SizeStr(types.NewInt(info.Resources.MemSwap))) fmt.Printf("RAM: %s/%s; Swap: %s/%s\n",
fmt.Printf("Reserved memory: %s\n", types.SizeStr(types.NewInt(info.Resources.MemReserved))) types.SizeStr(types.NewInt(info.Resources.MemUsed)),
types.SizeStr(types.NewInt(info.Resources.MemPhysical)),
types.SizeStr(types.NewInt(info.Resources.MemSwapUsed)),
types.SizeStr(types.NewInt(info.Resources.MemSwap)))
fmt.Printf("Task types: ") fmt.Printf("Task types: ")
for _, t := range ttList(tt) { for _, t := range ttList(tt) {

View File

@ -61,17 +61,26 @@ func (a *activeResources) canHandleRequest(needRes Resources, wid WorkerID, call
} }
res := info.Resources res := info.Resources
// TODO: dedupe needRes.BaseMinMemory per task type (don't add if that task is already running) // TODO: dedupe needRes.BaseMinMemory per task type (don't add if that task is already running)
minNeedMem := res.MemReserved + a.memUsedMin + needRes.MinMemory + needRes.BaseMinMemory memNeeded := needRes.MinMemory + needRes.BaseMinMemory
if minNeedMem > res.MemPhysical { memUsed := a.memUsedMin
log.Debugf("sched: not scheduling on worker %s for %s; not enough physical memory - need: %dM, have %dM", wid, caller, minNeedMem/mib, res.MemPhysical/mib) // assume that MemUsed can be swapped, so only check it in the vmem Check
memAvail := res.MemPhysical - memUsed
if memNeeded > memAvail {
log.Debugf("sched: not scheduling on worker %s for %s; not enough physical memory - need: %dM, have %dM available", wid, caller, memNeeded/mib, memAvail/mib)
return false return false
} }
maxNeedMem := res.MemReserved + a.memUsedMax + needRes.MaxMemory + needRes.BaseMinMemory vmemNeeded := needRes.MaxMemory + needRes.BaseMinMemory
vmemUsed := a.memUsedMax
if vmemUsed < res.MemUsed+res.MemSwapUsed {
vmemUsed = res.MemUsed + res.MemSwapUsed
}
vmemAvail := res.MemPhysical + res.MemSwap - vmemUsed
if maxNeedMem > res.MemSwap+res.MemPhysical { if vmemNeeded > vmemAvail {
log.Debugf("sched: not scheduling on worker %s for %s; not enough virtual memory - need: %dM, have %dM", wid, caller, maxNeedMem/mib, (res.MemSwap+res.MemPhysical)/mib) log.Debugf("sched: not scheduling on worker %s for %s; not enough virtual memory - need: %dM, have %dM available", wid, caller, vmemNeeded/mib, vmemAvail/mib)
return false return false
} }
@ -96,12 +105,21 @@ func (a *activeResources) utilization(wr storiface.WorkerResources) float64 {
cpu := float64(a.cpuUse) / float64(wr.CPUs) cpu := float64(a.cpuUse) / float64(wr.CPUs)
max = cpu max = cpu
memMin := float64(a.memUsedMin+wr.MemReserved) / float64(wr.MemPhysical) memUsed := a.memUsedMin
if memUsed < wr.MemUsed {
memUsed = wr.MemUsed
}
memMin := float64(memUsed) / float64(wr.MemPhysical)
if memMin > max { if memMin > max {
max = memMin max = memMin
} }
memMax := float64(a.memUsedMax+wr.MemReserved) / float64(wr.MemPhysical+wr.MemSwap) vmemUsed := a.memUsedMax
if a.memUsedMax < wr.MemUsed+wr.MemSwapUsed {
vmemUsed = wr.MemUsed + wr.MemSwapUsed
}
memMax := float64(vmemUsed) / float64(wr.MemPhysical+wr.MemSwap)
if memMax > max { if memMax > max {
max = memMax max = memMax
} }

View File

@ -41,14 +41,16 @@ func TestWithPriority(t *testing.T) {
var decentWorkerResources = storiface.WorkerResources{ var decentWorkerResources = storiface.WorkerResources{
MemPhysical: 128 << 30, MemPhysical: 128 << 30,
MemSwap: 200 << 30, MemSwap: 200 << 30,
MemReserved: 2 << 30, MemUsed: 1 << 30,
MemSwapUsed: 1 << 30,
CPUs: 32, CPUs: 32,
GPUs: []string{"a GPU"}, GPUs: []string{"a GPU"},
} }
var constrainedWorkerResources = storiface.WorkerResources{ var constrainedWorkerResources = storiface.WorkerResources{
MemPhysical: 1 << 30, MemPhysical: 1 << 30,
MemReserved: 2 << 30, MemUsed: 1 << 30,
MemSwapUsed: 1 << 30,
CPUs: 1, CPUs: 1,
} }

View File

@ -28,9 +28,9 @@ type WorkerInfo struct {
type WorkerResources struct { type WorkerResources struct {
MemPhysical uint64 MemPhysical uint64
MemUsed uint64
MemSwap uint64 MemSwap uint64
MemSwapUsed uint64
MemReserved uint64 // Used by system / other processes
CPUs uint64 // Logical cores CPUs uint64 // Logical cores
GPUs []string GPUs []string

View File

@ -108,8 +108,8 @@ func (t *testWorker) Info(ctx context.Context) (storiface.WorkerInfo, error) {
Hostname: "testworkerer", Hostname: "testworkerer",
Resources: storiface.WorkerResources{ Resources: storiface.WorkerResources{
MemPhysical: res.MinMemory * 3, MemPhysical: res.MinMemory * 3,
MemUsed: res.MinMemory,
MemSwap: 0, MemSwap: 0,
MemReserved: res.MinMemory,
CPUs: 32, CPUs: 32,
GPUs: nil, GPUs: nil,
}, },

View File

@ -482,51 +482,50 @@ func (l *LocalWorker) Paths(ctx context.Context) ([]stores.StoragePath, error) {
return l.localStore.Local(ctx) return l.localStore.Local(ctx)
} }
func (l *LocalWorker) memInfo() (memPhysical uint64, memVirtual uint64, memReserved uint64, err error) { func (l *LocalWorker) memInfo() (memPhysical, memUsed, memSwap, memSwapUsed uint64, err error) {
h, err := sysinfo.Host() h, err := sysinfo.Host()
if err != nil { if err != nil {
return 0, 0, 0, err return 0, 0, 0, 0, err
} }
mem, err := h.Memory() mem, err := h.Memory()
if err != nil { if err != nil {
return 0, 0, 0, err return 0, 0, 0, 0, err
} }
memPhysical = mem.Total memPhysical = mem.Total
memAvail := mem.Free // mem.Available is memory available without swapping, it is more relevant for this calculation
memSwap := mem.VirtualTotal memUsed = mem.Total - mem.Available
swapAvail := mem.VirtualFree memSwap = mem.VirtualTotal
memSwapUsed = mem.VirtualUsed
if cgMemMax, cgMemUsed, cgSwapMax, cgSwapUsed, err := cgroupV1Mem(); err == nil { if cgMemMax, cgMemUsed, cgSwapMax, cgSwapUsed, err := cgroupV1Mem(); err == nil {
if cgMemMax > 0 && cgMemMax < memPhysical { if cgMemMax > 0 && cgMemMax < memPhysical {
memPhysical = cgMemMax memPhysical = cgMemMax
memAvail = cgMemMax - cgMemUsed memUsed = cgMemUsed
} }
if cgSwapMax > 0 && cgSwapMax < memSwap { if cgSwapMax > 0 && cgSwapMax < memSwap {
memSwap = cgSwapMax memSwap = cgSwapMax
swapAvail = cgSwapMax - cgSwapUsed memSwapUsed = cgSwapUsed
} }
} }
if cgMemMax, cgMemUsed, cgSwapMax, cgSwapUsed, err := cgroupV2Mem(); err == nil { if cgMemMax, cgMemUsed, cgSwapMax, cgSwapUsed, err := cgroupV2Mem(); err == nil {
if cgMemMax > 0 && cgMemMax < memPhysical { if cgMemMax > 0 && cgMemMax < memPhysical {
memPhysical = cgMemMax memPhysical = cgMemMax
memAvail = cgMemMax - cgMemUsed memUsed = cgMemUsed
} }
if cgSwapMax > 0 && cgSwapMax < memSwap { if cgSwapMax > 0 && cgSwapMax < memSwap {
memSwap = cgSwapMax memSwap = cgSwapMax
swapAvail = cgSwapMax - cgSwapUsed memSwapUsed = cgSwapUsed
} }
} }
if l.noSwap { if l.noSwap {
memSwap = 0 memSwap = 0
swapAvail = 0 memSwapUsed = 0
} }
memReserved = memPhysical + memSwap - memAvail - swapAvail return memPhysical, memUsed, memSwap, memSwapUsed, nil
return memPhysical, memSwap, memReserved, nil
} }
func (l *LocalWorker) Info(context.Context) (storiface.WorkerInfo, error) { func (l *LocalWorker) Info(context.Context) (storiface.WorkerInfo, error) {
@ -540,7 +539,7 @@ func (l *LocalWorker) Info(context.Context) (storiface.WorkerInfo, error) {
log.Errorf("getting gpu devices failed: %+v", err) log.Errorf("getting gpu devices failed: %+v", err)
} }
memPhysical, memSwap, memReserved, err := l.memInfo() memPhysical, memUsed, memSwap, memSwapUsed, err := l.memInfo()
if err != nil { if err != nil {
return storiface.WorkerInfo{}, xerrors.Errorf("getting memory info: %w", err) return storiface.WorkerInfo{}, xerrors.Errorf("getting memory info: %w", err)
} }
@ -550,8 +549,9 @@ func (l *LocalWorker) Info(context.Context) (storiface.WorkerInfo, error) {
IgnoreResources: l.ignoreResources, IgnoreResources: l.ignoreResources,
Resources: storiface.WorkerResources{ Resources: storiface.WorkerResources{
MemPhysical: memPhysical, MemPhysical: memPhysical,
MemUsed: memUsed,
MemSwap: memSwap, MemSwap: memSwap,
MemReserved: memReserved, MemSwapUsed: memSwapUsed,
CPUs: uint64(runtime.NumCPU()), CPUs: uint64(runtime.NumCPU()),
GPUs: gpus, GPUs: gpus,
}, },