Use cgroup limits in worker memory calculations

Worker processes may have memory limitations imposed by Systemd. But
/proc/meminfo shows the entire system memory regardless of these limits.
This results in the scheduler believing the worker has the entire system
memory avaliable and the worker being allocated too many tasks.

This change attempts to read cgroup memory limits for the worker
process. It supports cgroups v1 and v2, and compares cgroup limits
against the system memory and returns the most conservative values to
prevent the worker from being allocated too many tasks and potentially
triggering an OOM event.
This commit is contained in:
Clint Armstrong 2021-08-31 16:52:47 -04:00 committed by Łukasz Magiera
parent 19e808fffb
commit e2a1ca7caa
5 changed files with 181 additions and 13 deletions

12
extern/sector-storage/cgroups.go vendored Normal file
View File

@ -0,0 +1,12 @@
//go:build !linux
// +build !linux
package sectorstorage
func cgroupV1Mem() (memoryMax, memoryUsed, swapMax, swapUsed uint64, err error) {
return 0, 0, 0, 0, nil
}
func cgroupV2Mem() (memoryMax, memoryUsed, swapMax, swapUsed uint64, err error) {
return 0, 0, 0, 0, nil
}

117
extern/sector-storage/cgroups_linux.go vendored Normal file
View File

@ -0,0 +1,117 @@
//go:build linux
// +build linux
package sectorstorage
import (
"bufio"
"bytes"
"math"
"os"
"path/filepath"
"github.com/containerd/cgroups"
cgroupv2 "github.com/containerd/cgroups/v2"
)
func cgroupV2MountPoint() (string, error) {
f, err := os.Open("/proc/self/mountinfo")
if err != nil {
return "", err
}
defer f.Close()
scanner := bufio.NewScanner(f)
for scanner.Scan() {
fields := bytes.Fields(scanner.Bytes())
if len(fields) >= 9 && bytes.Equal(fields[8], []byte("cgroup2")) {
return string(fields[4]), nil
}
}
return "", cgroups.ErrMountPointNotExist
}
func cgroupV1Mem() (memoryMax, memoryUsed, swapMax, swapUsed uint64, err error) {
path := cgroups.NestedPath("")
if pid := os.Getpid(); pid == 1 {
path = cgroups.RootPath
}
c, err := cgroups.Load(cgroups.SingleSubsystem(cgroups.V1, cgroups.Memory), path)
if err != nil {
return 0, 0, 0, 0, err
}
stats, err := c.Stat()
if err != nil {
return 0, 0, 0, 0, err
}
if stats.Memory == nil {
return 0, 0, 0, 0, nil
}
if stats.Memory.Usage != nil {
memoryMax = stats.Memory.Usage.Limit
// Exclude cached files
memoryUsed = stats.Memory.Usage.Usage - stats.Memory.InactiveFile - stats.Memory.ActiveFile
}
if stats.Memory.Swap != nil {
swapMax = stats.Memory.Swap.Limit
swapUsed = stats.Memory.Swap.Usage
}
return memoryMax, memoryUsed, swapMax, swapUsed, nil
}
func cgroupV2MemFromPath(mp, path string) (memoryMax, memoryUsed, swapMax, swapUsed uint64, err error) {
c, err := cgroupv2.LoadManager(mp, path)
if err != nil {
return 0, 0, 0, 0, err
}
stats, err := c.Stat()
if err != nil {
return 0, 0, 0, 0, err
}
if stats.Memory != nil {
memoryMax = stats.Memory.UsageLimit
// Exclude memory used caching files
memoryUsed = stats.Memory.Usage - stats.Memory.File
swapMax = stats.Memory.SwapLimit
swapUsed = stats.Memory.SwapUsage
}
return memoryMax, memoryUsed, swapMax, swapUsed, nil
}
func cgroupV2Mem() (memoryMax, memoryUsed, swapMax, swapUsed uint64, err error) {
memoryMax = math.MaxUint64
swapMax = math.MaxUint64
path, err := cgroupv2.PidGroupPath(os.Getpid())
if err != nil {
return 0, 0, 0, 0, err
}
mp, err := cgroupV2MountPoint()
if err != nil {
return 0, 0, 0, 0, err
}
for path != "/" {
cgMemoryMax, cgMemoryUsed, cgSwapMax, cgSwapUsed, err := cgroupV2MemFromPath(mp, path)
if err != nil {
return 0, 0, 0, 0, err
}
if cgMemoryMax != 0 && cgMemoryMax < memoryMax {
log.Debugf("memory limited by cgroup %s: %v", path, cgMemoryMax)
memoryMax = cgMemoryMax
memoryUsed = cgMemoryUsed
}
if cgSwapMax != 0 && cgSwapMax < swapMax {
log.Debugf("swap limited by cgroup %s: %v", path, cgSwapMax)
swapMax = cgSwapMax
swapUsed = cgSwapUsed
}
path = filepath.Dir(path)
}
return memoryMax, memoryUsed, swapMax, swapUsed, nil
}

View File

@ -482,6 +482,53 @@ func (l *LocalWorker) Paths(ctx context.Context) ([]stores.StoragePath, error) {
return l.localStore.Local(ctx)
}
func (l *LocalWorker) memInfo() (memPhysical uint64, memVirtual uint64, memReserved uint64, err error) {
h, err := sysinfo.Host()
if err != nil {
return 0, 0, 0, err
}
mem, err := h.Memory()
if err != nil {
return 0, 0, 0, err
}
memPhysical = mem.Total
memAvail := mem.Free
memSwap := mem.VirtualTotal
swapAvail := mem.VirtualFree
if cgMemMax, cgMemUsed, cgSwapMax, cgSwapUsed, err := cgroupV1Mem(); err == nil {
if cgMemMax > 0 && cgMemMax < memPhysical {
memPhysical = cgMemMax
memAvail = cgMemMax - cgMemUsed
}
if cgSwapMax > 0 && cgSwapMax < memSwap {
memSwap = cgSwapMax
swapAvail = cgSwapMax - cgSwapUsed
}
}
if cgMemMax, cgMemUsed, cgSwapMax, cgSwapUsed, err := cgroupV2Mem(); err == nil {
if cgMemMax > 0 && cgMemMax < memPhysical {
memPhysical = cgMemMax
memAvail = cgMemMax - cgMemUsed
}
if cgSwapMax > 0 && cgSwapMax < memSwap {
memSwap = cgSwapMax
swapAvail = cgSwapMax - cgSwapUsed
}
}
if l.noSwap {
memSwap = 0
swapAvail = 0
}
memReserved = memPhysical + memSwap - memAvail - swapAvail
return memPhysical, memSwap, memReserved, nil
}
func (l *LocalWorker) Info(context.Context) (storiface.WorkerInfo, error) {
hostname, err := os.Hostname() // TODO: allow overriding from config
if err != nil {
@ -493,28 +540,18 @@ func (l *LocalWorker) Info(context.Context) (storiface.WorkerInfo, error) {
log.Errorf("getting gpu devices failed: %+v", err)
}
h, err := sysinfo.Host()
if err != nil {
return storiface.WorkerInfo{}, xerrors.Errorf("getting host info: %w", err)
}
mem, err := h.Memory()
memPhysical, memSwap, memReserved, err := l.memInfo()
if err != nil {
return storiface.WorkerInfo{}, xerrors.Errorf("getting memory info: %w", err)
}
memSwap := mem.VirtualTotal
if l.noSwap {
memSwap = 0
}
return storiface.WorkerInfo{
Hostname: hostname,
IgnoreResources: l.ignoreResources,
Resources: storiface.WorkerResources{
MemPhysical: mem.Total,
MemPhysical: memPhysical,
MemSwap: memSwap,
MemReserved: mem.VirtualUsed + mem.Total - mem.Available, // TODO: sub this process
MemReserved: memReserved,
CPUs: uint64(runtime.NumCPU()),
GPUs: gpus,
},

1
go.mod
View File

@ -15,6 +15,7 @@ require (
github.com/buger/goterm v1.0.3
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e
github.com/cockroachdb/pebble v0.0.0-20201001221639-879f3bfeef07
github.com/containerd/cgroups v0.0.0-20201119153540-4cbc285b3327
github.com/coreos/go-systemd/v22 v22.3.2
github.com/detailyang/go-fallocate v0.0.0-20180908115635-432fa640bd2e
github.com/dgraph-io/badger/v2 v2.2007.2

1
go.sum
View File

@ -174,6 +174,7 @@ github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e h1:fY5BOSpyZCqRo5O
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1 h1:q763qf9huN11kDQavWsoZXJNW3xEE4JJyHa5Q25/sd8=
github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
github.com/cilium/ebpf v0.2.0 h1:Fv93L3KKckEcEHR3oApXVzyBTDA8WAm6VXhPE00N3f8=
github.com/cilium/ebpf v0.2.0/go.mod h1:To2CFviqOWL/M0gIMsvSMlqe7em/l1ALkX1PyjrX2Qs=
github.com/circonus-labs/circonus-gometrics v2.3.1+incompatible/go.mod h1:nmEj6Dob7S7YxXgwXpfOuvO54S+tGdZdw9fuRZt25Ag=
github.com/circonus-labs/circonusllhist v0.1.3/go.mod h1:kMXHVDlOchFAehlya5ePtbp5jckzBHf4XRpQvBOLI+I=