diff --git a/extern/sector-storage/cgroups.go b/extern/sector-storage/cgroups.go new file mode 100644 index 000000000..e2ec0564e --- /dev/null +++ b/extern/sector-storage/cgroups.go @@ -0,0 +1,12 @@ +//go:build !linux +// +build !linux + +package sectorstorage + +func cgroupV1Mem() (memoryMax, memoryUsed, swapMax, swapUsed uint64, err error) { + return 0, 0, 0, 0, nil +} + +func cgroupV2Mem() (memoryMax, memoryUsed, swapMax, swapUsed uint64, err error) { + return 0, 0, 0, 0, nil +} diff --git a/extern/sector-storage/cgroups_linux.go b/extern/sector-storage/cgroups_linux.go new file mode 100644 index 000000000..0b6efea99 --- /dev/null +++ b/extern/sector-storage/cgroups_linux.go @@ -0,0 +1,117 @@ +//go:build linux +// +build linux + +package sectorstorage + +import ( + "bufio" + "bytes" + "math" + "os" + "path/filepath" + + "github.com/containerd/cgroups" + cgroupv2 "github.com/containerd/cgroups/v2" +) + +func cgroupV2MountPoint() (string, error) { + f, err := os.Open("/proc/self/mountinfo") + if err != nil { + return "", err + } + defer f.Close() + + scanner := bufio.NewScanner(f) + for scanner.Scan() { + fields := bytes.Fields(scanner.Bytes()) + if len(fields) >= 9 && bytes.Equal(fields[8], []byte("cgroup2")) { + return string(fields[4]), nil + } + } + return "", cgroups.ErrMountPointNotExist +} + +func cgroupV1Mem() (memoryMax, memoryUsed, swapMax, swapUsed uint64, err error) { + path := cgroups.NestedPath("") + if pid := os.Getpid(); pid == 1 { + path = cgroups.RootPath + } + c, err := cgroups.Load(cgroups.SingleSubsystem(cgroups.V1, cgroups.Memory), path) + if err != nil { + return 0, 0, 0, 0, err + } + stats, err := c.Stat() + if err != nil { + return 0, 0, 0, 0, err + } + if stats.Memory == nil { + return 0, 0, 0, 0, nil + } + if stats.Memory.Usage != nil { + memoryMax = stats.Memory.Usage.Limit + // Exclude cached files + memoryUsed = stats.Memory.Usage.Usage - stats.Memory.InactiveFile - stats.Memory.ActiveFile + } + if stats.Memory.Swap != nil { + swapMax = stats.Memory.Swap.Limit + swapUsed = stats.Memory.Swap.Usage + } + return memoryMax, memoryUsed, swapMax, swapUsed, nil +} + +func cgroupV2MemFromPath(mp, path string) (memoryMax, memoryUsed, swapMax, swapUsed uint64, err error) { + c, err := cgroupv2.LoadManager(mp, path) + if err != nil { + return 0, 0, 0, 0, err + } + + stats, err := c.Stat() + if err != nil { + return 0, 0, 0, 0, err + } + + if stats.Memory != nil { + memoryMax = stats.Memory.UsageLimit + // Exclude memory used caching files + memoryUsed = stats.Memory.Usage - stats.Memory.File + swapMax = stats.Memory.SwapLimit + swapUsed = stats.Memory.SwapUsage + } + + return memoryMax, memoryUsed, swapMax, swapUsed, nil +} + +func cgroupV2Mem() (memoryMax, memoryUsed, swapMax, swapUsed uint64, err error) { + memoryMax = math.MaxUint64 + swapMax = math.MaxUint64 + + path, err := cgroupv2.PidGroupPath(os.Getpid()) + if err != nil { + return 0, 0, 0, 0, err + } + + mp, err := cgroupV2MountPoint() + if err != nil { + return 0, 0, 0, 0, err + } + + for path != "/" { + cgMemoryMax, cgMemoryUsed, cgSwapMax, cgSwapUsed, err := cgroupV2MemFromPath(mp, path) + if err != nil { + return 0, 0, 0, 0, err + } + if cgMemoryMax != 0 && cgMemoryMax < memoryMax { + log.Debugf("memory limited by cgroup %s: %v", path, cgMemoryMax) + memoryMax = cgMemoryMax + memoryUsed = cgMemoryUsed + } + if cgSwapMax != 0 && cgSwapMax < swapMax { + log.Debugf("swap limited by cgroup %s: %v", path, cgSwapMax) + swapMax = cgSwapMax + swapUsed = cgSwapUsed + } + path = filepath.Dir(path) + } + + return memoryMax, memoryUsed, swapMax, swapUsed, nil +} diff --git a/extern/sector-storage/worker_local.go b/extern/sector-storage/worker_local.go index d45d140f8..c8a3d0e7c 100644 --- a/extern/sector-storage/worker_local.go +++ b/extern/sector-storage/worker_local.go @@ -482,6 +482,53 @@ func (l *LocalWorker) Paths(ctx context.Context) ([]stores.StoragePath, error) { return l.localStore.Local(ctx) } +func (l *LocalWorker) memInfo() (memPhysical uint64, memVirtual uint64, memReserved uint64, err error) { + h, err := sysinfo.Host() + if err != nil { + return 0, 0, 0, err + } + + mem, err := h.Memory() + if err != nil { + return 0, 0, 0, err + } + memPhysical = mem.Total + memAvail := mem.Free + memSwap := mem.VirtualTotal + swapAvail := mem.VirtualFree + + if cgMemMax, cgMemUsed, cgSwapMax, cgSwapUsed, err := cgroupV1Mem(); err == nil { + if cgMemMax > 0 && cgMemMax < memPhysical { + memPhysical = cgMemMax + memAvail = cgMemMax - cgMemUsed + } + if cgSwapMax > 0 && cgSwapMax < memSwap { + memSwap = cgSwapMax + swapAvail = cgSwapMax - cgSwapUsed + } + } + + if cgMemMax, cgMemUsed, cgSwapMax, cgSwapUsed, err := cgroupV2Mem(); err == nil { + if cgMemMax > 0 && cgMemMax < memPhysical { + memPhysical = cgMemMax + memAvail = cgMemMax - cgMemUsed + } + if cgSwapMax > 0 && cgSwapMax < memSwap { + memSwap = cgSwapMax + swapAvail = cgSwapMax - cgSwapUsed + } + } + + if l.noSwap { + memSwap = 0 + swapAvail = 0 + } + + memReserved = memPhysical + memSwap - memAvail - swapAvail + + return memPhysical, memSwap, memReserved, nil +} + func (l *LocalWorker) Info(context.Context) (storiface.WorkerInfo, error) { hostname, err := os.Hostname() // TODO: allow overriding from config if err != nil { @@ -493,28 +540,18 @@ func (l *LocalWorker) Info(context.Context) (storiface.WorkerInfo, error) { log.Errorf("getting gpu devices failed: %+v", err) } - h, err := sysinfo.Host() - if err != nil { - return storiface.WorkerInfo{}, xerrors.Errorf("getting host info: %w", err) - } - - mem, err := h.Memory() + memPhysical, memSwap, memReserved, err := l.memInfo() if err != nil { return storiface.WorkerInfo{}, xerrors.Errorf("getting memory info: %w", err) } - memSwap := mem.VirtualTotal - if l.noSwap { - memSwap = 0 - } - return storiface.WorkerInfo{ Hostname: hostname, IgnoreResources: l.ignoreResources, Resources: storiface.WorkerResources{ - MemPhysical: mem.Total, + MemPhysical: memPhysical, MemSwap: memSwap, - MemReserved: mem.VirtualUsed + mem.Total - mem.Available, // TODO: sub this process + MemReserved: memReserved, CPUs: uint64(runtime.NumCPU()), GPUs: gpus, }, diff --git a/go.mod b/go.mod index 3866fb7de..ba29f8cdf 100644 --- a/go.mod +++ b/go.mod @@ -15,6 +15,7 @@ require ( github.com/buger/goterm v1.0.3 github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e github.com/cockroachdb/pebble v0.0.0-20201001221639-879f3bfeef07 + github.com/containerd/cgroups v0.0.0-20201119153540-4cbc285b3327 github.com/coreos/go-systemd/v22 v22.3.2 github.com/detailyang/go-fallocate v0.0.0-20180908115635-432fa640bd2e github.com/dgraph-io/badger/v2 v2.2007.2 diff --git a/go.sum b/go.sum index 03d858d27..c0ceb01ae 100644 --- a/go.sum +++ b/go.sum @@ -174,6 +174,7 @@ github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e h1:fY5BOSpyZCqRo5O github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1 h1:q763qf9huN11kDQavWsoZXJNW3xEE4JJyHa5Q25/sd8= github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= +github.com/cilium/ebpf v0.2.0 h1:Fv93L3KKckEcEHR3oApXVzyBTDA8WAm6VXhPE00N3f8= github.com/cilium/ebpf v0.2.0/go.mod h1:To2CFviqOWL/M0gIMsvSMlqe7em/l1ALkX1PyjrX2Qs= github.com/circonus-labs/circonus-gometrics v2.3.1+incompatible/go.mod h1:nmEj6Dob7S7YxXgwXpfOuvO54S+tGdZdw9fuRZt25Ag= github.com/circonus-labs/circonusllhist v0.1.3/go.mod h1:kMXHVDlOchFAehlya5ePtbp5jckzBHf4XRpQvBOLI+I=