mirror of
https://github.com/superseriousbusiness/gotosocial.git
synced 2025-10-31 21:12:24 -05:00
[chore]: Bump github.com/KimMachineGun/automemlimit from 0.2.4 to 0.2.5 (#1666)
Bumps [github.com/KimMachineGun/automemlimit](https://github.com/KimMachineGun/automemlimit) from 0.2.4 to 0.2.5. - [Release notes](https://github.com/KimMachineGun/automemlimit/releases) - [Commits](https://github.com/KimMachineGun/automemlimit/compare/v0.2.4...v0.2.5) --- updated-dependencies: - dependency-name: github.com/KimMachineGun/automemlimit dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
This commit is contained in:
parent
3f9b2336c0
commit
57dc742c76
200 changed files with 16392 additions and 38190 deletions
990
vendor/github.com/containerd/cgroups/v3/cgroup2/manager.go
generated
vendored
Normal file
990
vendor/github.com/containerd/cgroups/v3/cgroup2/manager.go
generated
vendored
Normal file
|
|
@ -0,0 +1,990 @@
|
|||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cgroup2
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"math"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/containerd/cgroups/v3/cgroup2/stats"
|
||||
|
||||
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
|
||||
"github.com/godbus/dbus/v5"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/sirupsen/logrus"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
const (
|
||||
subtreeControl = "cgroup.subtree_control"
|
||||
controllersFile = "cgroup.controllers"
|
||||
killFile = "cgroup.kill"
|
||||
defaultCgroup2Path = "/sys/fs/cgroup"
|
||||
defaultSlice = "system.slice"
|
||||
)
|
||||
|
||||
var (
|
||||
canDelegate bool
|
||||
)
|
||||
|
||||
type Event struct {
|
||||
Low uint64
|
||||
High uint64
|
||||
Max uint64
|
||||
OOM uint64
|
||||
OOMKill uint64
|
||||
}
|
||||
|
||||
// Resources for a cgroups v2 unified hierarchy
|
||||
type Resources struct {
|
||||
CPU *CPU
|
||||
Memory *Memory
|
||||
Pids *Pids
|
||||
IO *IO
|
||||
RDMA *RDMA
|
||||
HugeTlb *HugeTlb
|
||||
// When len(Devices) is zero, devices are not controlled
|
||||
Devices []specs.LinuxDeviceCgroup
|
||||
}
|
||||
|
||||
// Values returns the raw filenames and values that
|
||||
// can be written to the unified hierarchy
|
||||
func (r *Resources) Values() (o []Value) {
|
||||
if r.CPU != nil {
|
||||
o = append(o, r.CPU.Values()...)
|
||||
}
|
||||
if r.Memory != nil {
|
||||
o = append(o, r.Memory.Values()...)
|
||||
}
|
||||
if r.Pids != nil {
|
||||
o = append(o, r.Pids.Values()...)
|
||||
}
|
||||
if r.IO != nil {
|
||||
o = append(o, r.IO.Values()...)
|
||||
}
|
||||
if r.RDMA != nil {
|
||||
o = append(o, r.RDMA.Values()...)
|
||||
}
|
||||
if r.HugeTlb != nil {
|
||||
o = append(o, r.HugeTlb.Values()...)
|
||||
}
|
||||
return o
|
||||
}
|
||||
|
||||
// EnabledControllers returns the list of all not nil resource controllers
|
||||
func (r *Resources) EnabledControllers() (c []string) {
|
||||
if r.CPU != nil {
|
||||
c = append(c, "cpu")
|
||||
c = append(c, "cpuset")
|
||||
}
|
||||
if r.Memory != nil {
|
||||
c = append(c, "memory")
|
||||
}
|
||||
if r.Pids != nil {
|
||||
c = append(c, "pids")
|
||||
}
|
||||
if r.IO != nil {
|
||||
c = append(c, "io")
|
||||
}
|
||||
if r.RDMA != nil {
|
||||
c = append(c, "rdma")
|
||||
}
|
||||
if r.HugeTlb != nil {
|
||||
c = append(c, "hugetlb")
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Value of a cgroup setting
|
||||
type Value struct {
|
||||
filename string
|
||||
value interface{}
|
||||
}
|
||||
|
||||
// write the value to the full, absolute path, of a unified hierarchy
|
||||
func (c *Value) write(path string, perm os.FileMode) error {
|
||||
var data []byte
|
||||
switch t := c.value.(type) {
|
||||
case uint64:
|
||||
data = []byte(strconv.FormatUint(t, 10))
|
||||
case uint16:
|
||||
data = []byte(strconv.FormatUint(uint64(t), 10))
|
||||
case int64:
|
||||
data = []byte(strconv.FormatInt(t, 10))
|
||||
case []byte:
|
||||
data = t
|
||||
case string:
|
||||
data = []byte(t)
|
||||
case CPUMax:
|
||||
data = []byte(t)
|
||||
default:
|
||||
return ErrInvalidFormat
|
||||
}
|
||||
|
||||
return os.WriteFile(
|
||||
filepath.Join(path, c.filename),
|
||||
data,
|
||||
perm,
|
||||
)
|
||||
}
|
||||
|
||||
func writeValues(path string, values []Value) error {
|
||||
for _, o := range values {
|
||||
if err := o.write(path, defaultFilePerm); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func NewManager(mountpoint string, group string, resources *Resources) (*Manager, error) {
|
||||
if resources == nil {
|
||||
return nil, errors.New("resources reference is nil")
|
||||
}
|
||||
if err := VerifyGroupPath(group); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
path := filepath.Join(mountpoint, group)
|
||||
if err := os.MkdirAll(path, defaultDirPerm); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
m := Manager{
|
||||
unifiedMountpoint: mountpoint,
|
||||
path: path,
|
||||
}
|
||||
if err := m.ToggleControllers(resources.EnabledControllers(), Enable); err != nil {
|
||||
// clean up cgroup dir on failure
|
||||
os.Remove(path)
|
||||
return nil, err
|
||||
}
|
||||
if err := setResources(path, resources); err != nil {
|
||||
os.Remove(path)
|
||||
return nil, err
|
||||
}
|
||||
return &m, nil
|
||||
}
|
||||
|
||||
type InitConfig struct {
|
||||
mountpoint string
|
||||
}
|
||||
|
||||
type InitOpts func(c *InitConfig) error
|
||||
|
||||
// WithMountpoint sets the unified mountpoint. The default path is /sys/fs/cgroup.
|
||||
func WithMountpoint(path string) InitOpts {
|
||||
return func(c *InitConfig) error {
|
||||
c.mountpoint = path
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// Load a cgroup.
|
||||
func Load(group string, opts ...InitOpts) (*Manager, error) {
|
||||
c := InitConfig{mountpoint: defaultCgroup2Path}
|
||||
for _, opt := range opts {
|
||||
if err := opt(&c); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
if err := VerifyGroupPath(group); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
path := filepath.Join(c.mountpoint, group)
|
||||
return &Manager{
|
||||
unifiedMountpoint: c.mountpoint,
|
||||
path: path,
|
||||
}, nil
|
||||
}
|
||||
|
||||
type Manager struct {
|
||||
unifiedMountpoint string
|
||||
path string
|
||||
}
|
||||
|
||||
func setResources(path string, resources *Resources) error {
|
||||
if resources != nil {
|
||||
if err := writeValues(path, resources.Values()); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := setDevices(path, resources.Devices); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Manager) RootControllers() ([]string, error) {
|
||||
b, err := os.ReadFile(filepath.Join(c.unifiedMountpoint, controllersFile))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return strings.Fields(string(b)), nil
|
||||
}
|
||||
|
||||
func (c *Manager) Controllers() ([]string, error) {
|
||||
b, err := os.ReadFile(filepath.Join(c.path, controllersFile))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return strings.Fields(string(b)), nil
|
||||
}
|
||||
|
||||
func (c *Manager) Update(resources *Resources) error {
|
||||
return setResources(c.path, resources)
|
||||
}
|
||||
|
||||
type ControllerToggle int
|
||||
|
||||
const (
|
||||
Enable ControllerToggle = iota + 1
|
||||
Disable
|
||||
)
|
||||
|
||||
func toggleFunc(controllers []string, prefix string) []string {
|
||||
out := make([]string, len(controllers))
|
||||
for i, c := range controllers {
|
||||
out[i] = prefix + c
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func (c *Manager) ToggleControllers(controllers []string, t ControllerToggle) error {
|
||||
// when c.path is like /foo/bar/baz, the following files need to be written:
|
||||
// * /sys/fs/cgroup/cgroup.subtree_control
|
||||
// * /sys/fs/cgroup/foo/cgroup.subtree_control
|
||||
// * /sys/fs/cgroup/foo/bar/cgroup.subtree_control
|
||||
// Note that /sys/fs/cgroup/foo/bar/baz/cgroup.subtree_control does not need to be written.
|
||||
split := strings.Split(c.path, "/")
|
||||
var lastErr error
|
||||
for i := range split {
|
||||
f := strings.Join(split[:i], "/")
|
||||
if !strings.HasPrefix(f, c.unifiedMountpoint) || f == c.path {
|
||||
continue
|
||||
}
|
||||
filePath := filepath.Join(f, subtreeControl)
|
||||
if err := c.writeSubtreeControl(filePath, controllers, t); err != nil {
|
||||
// When running as rootless, the user may face EPERM on parent groups, but it is neglible when the
|
||||
// controller is already written.
|
||||
// So we only return the last error.
|
||||
lastErr = fmt.Errorf("failed to write subtree controllers %+v to %q: %w", controllers, filePath, err)
|
||||
} else {
|
||||
lastErr = nil
|
||||
}
|
||||
}
|
||||
return lastErr
|
||||
}
|
||||
|
||||
func (c *Manager) writeSubtreeControl(filePath string, controllers []string, t ControllerToggle) error {
|
||||
f, err := os.OpenFile(filePath, os.O_WRONLY, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
switch t {
|
||||
case Enable:
|
||||
controllers = toggleFunc(controllers, "+")
|
||||
case Disable:
|
||||
controllers = toggleFunc(controllers, "-")
|
||||
}
|
||||
_, err = f.WriteString(strings.Join(controllers, " "))
|
||||
return err
|
||||
}
|
||||
|
||||
func (c *Manager) NewChild(name string, resources *Resources) (*Manager, error) {
|
||||
if strings.HasPrefix(name, "/") {
|
||||
return nil, errors.New("name must be relative")
|
||||
}
|
||||
path := filepath.Join(c.path, name)
|
||||
if err := os.MkdirAll(path, defaultDirPerm); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
m := Manager{
|
||||
unifiedMountpoint: c.unifiedMountpoint,
|
||||
path: path,
|
||||
}
|
||||
if resources != nil {
|
||||
if err := m.ToggleControllers(resources.EnabledControllers(), Enable); err != nil {
|
||||
// clean up cgroup dir on failure
|
||||
os.Remove(path)
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
if err := setResources(path, resources); err != nil {
|
||||
// clean up cgroup dir on failure
|
||||
os.Remove(path)
|
||||
return nil, err
|
||||
}
|
||||
return &m, nil
|
||||
}
|
||||
|
||||
func (c *Manager) AddProc(pid uint64) error {
|
||||
v := Value{
|
||||
filename: cgroupProcs,
|
||||
value: pid,
|
||||
}
|
||||
return writeValues(c.path, []Value{v})
|
||||
}
|
||||
|
||||
func (c *Manager) AddThread(tid uint64) error {
|
||||
v := Value{
|
||||
filename: cgroupThreads,
|
||||
value: tid,
|
||||
}
|
||||
return writeValues(c.path, []Value{v})
|
||||
}
|
||||
|
||||
// Kill will try to forcibly exit all of the processes in the cgroup. This is
|
||||
// equivalent to sending a SIGKILL to every process. On kernels 5.14 and greater
|
||||
// this will use the cgroup.kill file, on anything that doesn't have the cgroup.kill
|
||||
// file, a manual process of freezing -> sending a SIGKILL to every process -> thawing
|
||||
// will be used.
|
||||
func (c *Manager) Kill() error {
|
||||
v := Value{
|
||||
filename: killFile,
|
||||
value: "1",
|
||||
}
|
||||
err := writeValues(c.path, []Value{v})
|
||||
if err == nil {
|
||||
return nil
|
||||
}
|
||||
logrus.Warnf("falling back to slower kill implementation: %s", err)
|
||||
// Fallback to slow method.
|
||||
return c.fallbackKill()
|
||||
}
|
||||
|
||||
// fallbackKill is a slower fallback to the more modern (kernels 5.14+)
|
||||
// approach of writing to the cgroup.kill file. This is heavily pulled
|
||||
// from runc's same approach (in signalAllProcesses), with the only differences
|
||||
// being this is just tailored to the API exposed in this library, and we don't
|
||||
// need to care about signals other than SIGKILL.
|
||||
//
|
||||
// https://github.com/opencontainers/runc/blob/8da0a0b5675764feaaaaad466f6567a9983fcd08/libcontainer/init_linux.go#L523-L529
|
||||
func (c *Manager) fallbackKill() error {
|
||||
if err := c.Freeze(); err != nil {
|
||||
logrus.Warn(err)
|
||||
}
|
||||
pids, err := c.Procs(true)
|
||||
if err != nil {
|
||||
if err := c.Thaw(); err != nil {
|
||||
logrus.Warn(err)
|
||||
}
|
||||
return err
|
||||
}
|
||||
var procs []*os.Process
|
||||
for _, pid := range pids {
|
||||
p, err := os.FindProcess(int(pid))
|
||||
if err != nil {
|
||||
logrus.Warn(err)
|
||||
continue
|
||||
}
|
||||
procs = append(procs, p)
|
||||
if err := p.Signal(unix.SIGKILL); err != nil {
|
||||
logrus.Warn(err)
|
||||
}
|
||||
}
|
||||
if err := c.Thaw(); err != nil {
|
||||
logrus.Warn(err)
|
||||
}
|
||||
|
||||
subreaper, err := getSubreaper()
|
||||
if err != nil {
|
||||
// The error here means that PR_GET_CHILD_SUBREAPER is not
|
||||
// supported because this code might run on a kernel older
|
||||
// than 3.4. We don't want to throw an error in that case,
|
||||
// and we simplify things, considering there is no subreaper
|
||||
// set.
|
||||
subreaper = 0
|
||||
}
|
||||
|
||||
for _, p := range procs {
|
||||
// In case a subreaper has been setup, this code must not
|
||||
// wait for the process. Otherwise, we cannot be sure the
|
||||
// current process will be reaped by the subreaper, while
|
||||
// the subreaper might be waiting for this process in order
|
||||
// to retrieve its exit code.
|
||||
if subreaper == 0 {
|
||||
if _, err := p.Wait(); err != nil {
|
||||
if !errors.Is(err, unix.ECHILD) {
|
||||
logrus.Warnf("wait on pid %d failed: %s", p.Pid, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Manager) Delete() error {
|
||||
// kernel prevents cgroups with running process from being removed, check the tree is empty
|
||||
processes, err := c.Procs(true)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if len(processes) > 0 {
|
||||
return fmt.Errorf("cgroups: unable to remove path %q: still contains running processes", c.path)
|
||||
}
|
||||
return remove(c.path)
|
||||
}
|
||||
|
||||
func (c *Manager) Procs(recursive bool) ([]uint64, error) {
|
||||
var processes []uint64
|
||||
err := filepath.Walk(c.path, func(p string, info os.FileInfo, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !recursive && info.IsDir() {
|
||||
if p == c.path {
|
||||
return nil
|
||||
}
|
||||
return filepath.SkipDir
|
||||
}
|
||||
_, name := filepath.Split(p)
|
||||
if name != cgroupProcs {
|
||||
return nil
|
||||
}
|
||||
procs, err := parseCgroupProcsFile(p)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
processes = append(processes, procs...)
|
||||
return nil
|
||||
})
|
||||
return processes, err
|
||||
}
|
||||
|
||||
func (c *Manager) MoveTo(destination *Manager) error {
|
||||
processes, err := c.Procs(true)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, p := range processes {
|
||||
if err := destination.AddProc(p); err != nil {
|
||||
if strings.Contains(err.Error(), "no such process") {
|
||||
continue
|
||||
}
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
var singleValueFiles = []string{
|
||||
"pids.current",
|
||||
"pids.max",
|
||||
}
|
||||
|
||||
func (c *Manager) Stat() (*stats.Metrics, error) {
|
||||
controllers, err := c.Controllers()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
out := make(map[string]interface{})
|
||||
for _, controller := range controllers {
|
||||
switch controller {
|
||||
case "cpu", "memory":
|
||||
if err := readKVStatsFile(c.path, controller+".stat", out); err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
continue
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
}
|
||||
for _, name := range singleValueFiles {
|
||||
if err := readSingleFile(c.path, name, out); err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
continue
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
memoryEvents := make(map[string]interface{})
|
||||
if err := readKVStatsFile(c.path, "memory.events", memoryEvents); err != nil {
|
||||
if !os.IsNotExist(err) {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
var metrics stats.Metrics
|
||||
|
||||
metrics.Pids = &stats.PidsStat{
|
||||
Current: getPidValue("pids.current", out),
|
||||
Limit: getPidValue("pids.max", out),
|
||||
}
|
||||
metrics.CPU = &stats.CPUStat{
|
||||
UsageUsec: getUint64Value("usage_usec", out),
|
||||
UserUsec: getUint64Value("user_usec", out),
|
||||
SystemUsec: getUint64Value("system_usec", out),
|
||||
NrPeriods: getUint64Value("nr_periods", out),
|
||||
NrThrottled: getUint64Value("nr_throttled", out),
|
||||
ThrottledUsec: getUint64Value("throttled_usec", out),
|
||||
}
|
||||
metrics.Memory = &stats.MemoryStat{
|
||||
Anon: getUint64Value("anon", out),
|
||||
File: getUint64Value("file", out),
|
||||
KernelStack: getUint64Value("kernel_stack", out),
|
||||
Slab: getUint64Value("slab", out),
|
||||
Sock: getUint64Value("sock", out),
|
||||
Shmem: getUint64Value("shmem", out),
|
||||
FileMapped: getUint64Value("file_mapped", out),
|
||||
FileDirty: getUint64Value("file_dirty", out),
|
||||
FileWriteback: getUint64Value("file_writeback", out),
|
||||
AnonThp: getUint64Value("anon_thp", out),
|
||||
InactiveAnon: getUint64Value("inactive_anon", out),
|
||||
ActiveAnon: getUint64Value("active_anon", out),
|
||||
InactiveFile: getUint64Value("inactive_file", out),
|
||||
ActiveFile: getUint64Value("active_file", out),
|
||||
Unevictable: getUint64Value("unevictable", out),
|
||||
SlabReclaimable: getUint64Value("slab_reclaimable", out),
|
||||
SlabUnreclaimable: getUint64Value("slab_unreclaimable", out),
|
||||
Pgfault: getUint64Value("pgfault", out),
|
||||
Pgmajfault: getUint64Value("pgmajfault", out),
|
||||
WorkingsetRefault: getUint64Value("workingset_refault", out),
|
||||
WorkingsetActivate: getUint64Value("workingset_activate", out),
|
||||
WorkingsetNodereclaim: getUint64Value("workingset_nodereclaim", out),
|
||||
Pgrefill: getUint64Value("pgrefill", out),
|
||||
Pgscan: getUint64Value("pgscan", out),
|
||||
Pgsteal: getUint64Value("pgsteal", out),
|
||||
Pgactivate: getUint64Value("pgactivate", out),
|
||||
Pgdeactivate: getUint64Value("pgdeactivate", out),
|
||||
Pglazyfree: getUint64Value("pglazyfree", out),
|
||||
Pglazyfreed: getUint64Value("pglazyfreed", out),
|
||||
ThpFaultAlloc: getUint64Value("thp_fault_alloc", out),
|
||||
ThpCollapseAlloc: getUint64Value("thp_collapse_alloc", out),
|
||||
Usage: getStatFileContentUint64(filepath.Join(c.path, "memory.current")),
|
||||
UsageLimit: getStatFileContentUint64(filepath.Join(c.path, "memory.max")),
|
||||
SwapUsage: getStatFileContentUint64(filepath.Join(c.path, "memory.swap.current")),
|
||||
SwapLimit: getStatFileContentUint64(filepath.Join(c.path, "memory.swap.max")),
|
||||
}
|
||||
if len(memoryEvents) > 0 {
|
||||
metrics.MemoryEvents = &stats.MemoryEvents{
|
||||
Low: getUint64Value("low", memoryEvents),
|
||||
High: getUint64Value("high", memoryEvents),
|
||||
Max: getUint64Value("max", memoryEvents),
|
||||
Oom: getUint64Value("oom", memoryEvents),
|
||||
OomKill: getUint64Value("oom_kill", memoryEvents),
|
||||
}
|
||||
}
|
||||
metrics.Io = &stats.IOStat{Usage: readIoStats(c.path)}
|
||||
metrics.Rdma = &stats.RdmaStat{
|
||||
Current: rdmaStats(filepath.Join(c.path, "rdma.current")),
|
||||
Limit: rdmaStats(filepath.Join(c.path, "rdma.max")),
|
||||
}
|
||||
metrics.Hugetlb = readHugeTlbStats(c.path)
|
||||
|
||||
return &metrics, nil
|
||||
}
|
||||
|
||||
func getUint64Value(key string, out map[string]interface{}) uint64 {
|
||||
v, ok := out[key]
|
||||
if !ok {
|
||||
return 0
|
||||
}
|
||||
switch t := v.(type) {
|
||||
case uint64:
|
||||
return t
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func getPidValue(key string, out map[string]interface{}) uint64 {
|
||||
v, ok := out[key]
|
||||
if !ok {
|
||||
return 0
|
||||
}
|
||||
switch t := v.(type) {
|
||||
case uint64:
|
||||
return t
|
||||
case string:
|
||||
if t == "max" {
|
||||
return math.MaxUint64
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func readSingleFile(path string, file string, out map[string]interface{}) error {
|
||||
f, err := os.Open(filepath.Join(path, file))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
data, err := io.ReadAll(f)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
s := strings.TrimSpace(string(data))
|
||||
v, err := parseUint(s, 10, 64)
|
||||
if err != nil {
|
||||
// if we cannot parse as a uint, parse as a string
|
||||
out[file] = s
|
||||
return nil
|
||||
}
|
||||
out[file] = v
|
||||
return nil
|
||||
}
|
||||
|
||||
func readKVStatsFile(path string, file string, out map[string]interface{}) error {
|
||||
f, err := os.Open(filepath.Join(path, file))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
s := bufio.NewScanner(f)
|
||||
for s.Scan() {
|
||||
name, value, err := parseKV(s.Text())
|
||||
if err != nil {
|
||||
return fmt.Errorf("error while parsing %s (line=%q): %w", filepath.Join(path, file), s.Text(), err)
|
||||
}
|
||||
out[name] = value
|
||||
}
|
||||
return s.Err()
|
||||
}
|
||||
|
||||
func (c *Manager) Freeze() error {
|
||||
return c.freeze(c.path, Frozen)
|
||||
}
|
||||
|
||||
func (c *Manager) Thaw() error {
|
||||
return c.freeze(c.path, Thawed)
|
||||
}
|
||||
|
||||
func (c *Manager) freeze(path string, state State) error {
|
||||
values := state.Values()
|
||||
for {
|
||||
if err := writeValues(path, values); err != nil {
|
||||
return err
|
||||
}
|
||||
current, err := fetchState(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if current == state {
|
||||
return nil
|
||||
}
|
||||
time.Sleep(1 * time.Millisecond)
|
||||
}
|
||||
}
|
||||
|
||||
func (c *Manager) isCgroupEmpty() bool {
|
||||
// In case of any error we return true so that we exit and don't leak resources
|
||||
out := make(map[string]interface{})
|
||||
if err := readKVStatsFile(c.path, "cgroup.events", out); err != nil {
|
||||
return true
|
||||
}
|
||||
if v, ok := out["populated"]; ok {
|
||||
populated, ok := v.(uint64)
|
||||
if !ok {
|
||||
return true
|
||||
}
|
||||
return populated == 0
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// MemoryEventFD returns inotify file descriptor and 'memory.events' inotify watch descriptor
|
||||
func (c *Manager) MemoryEventFD() (int, uint32, error) {
|
||||
fpath := filepath.Join(c.path, "memory.events")
|
||||
fd, err := syscall.InotifyInit()
|
||||
if err != nil {
|
||||
return 0, 0, errors.New("failed to create inotify fd")
|
||||
}
|
||||
wd, err := syscall.InotifyAddWatch(fd, fpath, unix.IN_MODIFY)
|
||||
if err != nil {
|
||||
syscall.Close(fd)
|
||||
return 0, 0, fmt.Errorf("failed to add inotify watch for %q: %w", fpath, err)
|
||||
}
|
||||
// monitor to detect process exit/cgroup deletion
|
||||
evpath := filepath.Join(c.path, "cgroup.events")
|
||||
if _, err = syscall.InotifyAddWatch(fd, evpath, unix.IN_MODIFY); err != nil {
|
||||
syscall.Close(fd)
|
||||
return 0, 0, fmt.Errorf("failed to add inotify watch for %q: %w", evpath, err)
|
||||
}
|
||||
|
||||
return fd, uint32(wd), nil
|
||||
}
|
||||
|
||||
func (c *Manager) EventChan() (<-chan Event, <-chan error) {
|
||||
ec := make(chan Event)
|
||||
errCh := make(chan error, 1)
|
||||
go c.waitForEvents(ec, errCh)
|
||||
|
||||
return ec, errCh
|
||||
}
|
||||
|
||||
func parseMemoryEvents(out map[string]interface{}) (Event, error) {
|
||||
e := Event{}
|
||||
if v, ok := out["high"]; ok {
|
||||
e.High, ok = v.(uint64)
|
||||
if !ok {
|
||||
return Event{}, fmt.Errorf("cannot convert high to uint64: %+v", v)
|
||||
}
|
||||
}
|
||||
if v, ok := out["low"]; ok {
|
||||
e.Low, ok = v.(uint64)
|
||||
if !ok {
|
||||
return Event{}, fmt.Errorf("cannot convert low to uint64: %+v", v)
|
||||
}
|
||||
}
|
||||
if v, ok := out["max"]; ok {
|
||||
e.Max, ok = v.(uint64)
|
||||
if !ok {
|
||||
return Event{}, fmt.Errorf("cannot convert max to uint64: %+v", v)
|
||||
}
|
||||
}
|
||||
if v, ok := out["oom"]; ok {
|
||||
e.OOM, ok = v.(uint64)
|
||||
if !ok {
|
||||
return Event{}, fmt.Errorf("cannot convert oom to uint64: %+v", v)
|
||||
}
|
||||
}
|
||||
if v, ok := out["oom_kill"]; ok {
|
||||
e.OOMKill, ok = v.(uint64)
|
||||
if !ok {
|
||||
return Event{}, fmt.Errorf("cannot convert oom_kill to uint64: %+v", v)
|
||||
}
|
||||
}
|
||||
return e, nil
|
||||
}
|
||||
|
||||
func (c *Manager) waitForEvents(ec chan<- Event, errCh chan<- error) {
|
||||
defer close(errCh)
|
||||
|
||||
fd, _, err := c.MemoryEventFD()
|
||||
if err != nil {
|
||||
errCh <- err
|
||||
return
|
||||
}
|
||||
defer syscall.Close(fd)
|
||||
|
||||
for {
|
||||
buffer := make([]byte, syscall.SizeofInotifyEvent*10)
|
||||
bytesRead, err := syscall.Read(fd, buffer)
|
||||
if err != nil {
|
||||
errCh <- err
|
||||
return
|
||||
}
|
||||
if bytesRead >= syscall.SizeofInotifyEvent {
|
||||
out := make(map[string]interface{})
|
||||
if err := readKVStatsFile(c.path, "memory.events", out); err != nil {
|
||||
// When cgroup is deleted read may return -ENODEV instead of -ENOENT from open.
|
||||
if _, statErr := os.Lstat(filepath.Join(c.path, "memory.events")); !os.IsNotExist(statErr) {
|
||||
errCh <- err
|
||||
}
|
||||
return
|
||||
}
|
||||
e, err := parseMemoryEvents(out)
|
||||
if err != nil {
|
||||
errCh <- err
|
||||
return
|
||||
}
|
||||
ec <- e
|
||||
if c.isCgroupEmpty() {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func setDevices(path string, devices []specs.LinuxDeviceCgroup) error {
|
||||
if len(devices) == 0 {
|
||||
return nil
|
||||
}
|
||||
insts, license, err := DeviceFilter(devices)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
dirFD, err := unix.Open(path, unix.O_DIRECTORY|unix.O_RDONLY|unix.O_CLOEXEC, 0600)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot get dir FD for %s", path)
|
||||
}
|
||||
defer unix.Close(dirFD)
|
||||
if _, err := LoadAttachCgroupDeviceFilter(insts, license, dirFD); err != nil {
|
||||
if !canSkipEBPFError(devices) {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// getSystemdFullPath returns the full systemd path when creating a systemd slice group.
|
||||
// the reason this is necessary is because the "-" character has a special meaning in
|
||||
// systemd slice. For example, when creating a slice called "my-group-112233.slice",
|
||||
// systemd will create a hierarchy like this:
|
||||
//
|
||||
// /sys/fs/cgroup/my.slice/my-group.slice/my-group-112233.slice
|
||||
func getSystemdFullPath(slice, group string) string {
|
||||
return filepath.Join(defaultCgroup2Path, dashesToPath(slice), dashesToPath(group))
|
||||
}
|
||||
|
||||
// dashesToPath converts a slice name with dashes to it's corresponding systemd filesystem path.
|
||||
func dashesToPath(in string) string {
|
||||
path := ""
|
||||
if strings.HasSuffix(in, ".slice") && strings.Contains(in, "-") {
|
||||
parts := strings.Split(in, "-")
|
||||
for i := range parts {
|
||||
s := strings.Join(parts[0:i+1], "-")
|
||||
if !strings.HasSuffix(s, ".slice") {
|
||||
s += ".slice"
|
||||
}
|
||||
path = filepath.Join(path, s)
|
||||
}
|
||||
} else {
|
||||
path = filepath.Join(path, in)
|
||||
}
|
||||
return path
|
||||
}
|
||||
|
||||
func NewSystemd(slice, group string, pid int, resources *Resources) (*Manager, error) {
|
||||
if slice == "" {
|
||||
slice = defaultSlice
|
||||
}
|
||||
ctx := context.TODO()
|
||||
path := getSystemdFullPath(slice, group)
|
||||
conn, err := systemdDbus.NewWithContext(ctx)
|
||||
if err != nil {
|
||||
return &Manager{}, err
|
||||
}
|
||||
defer conn.Close()
|
||||
|
||||
properties := []systemdDbus.Property{
|
||||
systemdDbus.PropDescription("cgroup " + group),
|
||||
newSystemdProperty("DefaultDependencies", false),
|
||||
newSystemdProperty("MemoryAccounting", true),
|
||||
newSystemdProperty("CPUAccounting", true),
|
||||
newSystemdProperty("IOAccounting", true),
|
||||
}
|
||||
|
||||
// if we create a slice, the parent is defined via a Wants=
|
||||
if strings.HasSuffix(group, ".slice") {
|
||||
properties = append(properties, systemdDbus.PropWants(defaultSlice))
|
||||
} else {
|
||||
// otherwise, we use Slice=
|
||||
properties = append(properties, systemdDbus.PropSlice(defaultSlice))
|
||||
}
|
||||
|
||||
// only add pid if its valid, -1 is used w/ general slice creation.
|
||||
if pid != -1 {
|
||||
properties = append(properties, newSystemdProperty("PIDs", []uint32{uint32(pid)}))
|
||||
}
|
||||
|
||||
if resources.Memory != nil && resources.Memory.Min != nil && *resources.Memory.Min != 0 {
|
||||
properties = append(properties,
|
||||
newSystemdProperty("MemoryMin", uint64(*resources.Memory.Min)))
|
||||
}
|
||||
|
||||
if resources.Memory != nil && resources.Memory.Max != nil && *resources.Memory.Max != 0 {
|
||||
properties = append(properties,
|
||||
newSystemdProperty("MemoryMax", uint64(*resources.Memory.Max)))
|
||||
}
|
||||
|
||||
if resources.CPU != nil && resources.CPU.Weight != nil && *resources.CPU.Weight != 0 {
|
||||
properties = append(properties,
|
||||
newSystemdProperty("CPUWeight", *resources.CPU.Weight))
|
||||
}
|
||||
|
||||
if resources.CPU != nil && resources.CPU.Max != "" {
|
||||
quota, period := resources.CPU.Max.extractQuotaAndPeriod()
|
||||
// cpu.cfs_quota_us and cpu.cfs_period_us are controlled by systemd.
|
||||
// corresponds to USEC_INFINITY in systemd
|
||||
// if USEC_INFINITY is provided, CPUQuota is left unbound by systemd
|
||||
// always setting a property value ensures we can apply a quota and remove it later
|
||||
cpuQuotaPerSecUSec := uint64(math.MaxUint64)
|
||||
if quota > 0 {
|
||||
// systemd converts CPUQuotaPerSecUSec (microseconds per CPU second) to CPUQuota
|
||||
// (integer percentage of CPU) internally. This means that if a fractional percent of
|
||||
// CPU is indicated by Resources.CpuQuota, we need to round up to the nearest
|
||||
// 10ms (1% of a second) such that child cgroups can set the cpu.cfs_quota_us they expect.
|
||||
cpuQuotaPerSecUSec = uint64(quota*1000000) / period
|
||||
if cpuQuotaPerSecUSec%10000 != 0 {
|
||||
cpuQuotaPerSecUSec = ((cpuQuotaPerSecUSec / 10000) + 1) * 10000
|
||||
}
|
||||
}
|
||||
properties = append(properties,
|
||||
newSystemdProperty("CPUQuotaPerSecUSec", cpuQuotaPerSecUSec))
|
||||
}
|
||||
|
||||
// If we can delegate, we add the property back in
|
||||
if canDelegate {
|
||||
properties = append(properties, newSystemdProperty("Delegate", true))
|
||||
}
|
||||
|
||||
if resources.Pids != nil && resources.Pids.Max > 0 {
|
||||
properties = append(properties,
|
||||
newSystemdProperty("TasksAccounting", true),
|
||||
newSystemdProperty("TasksMax", uint64(resources.Pids.Max)))
|
||||
}
|
||||
|
||||
statusChan := make(chan string, 1)
|
||||
if _, err := conn.StartTransientUnitContext(ctx, group, "replace", properties, statusChan); err == nil {
|
||||
select {
|
||||
case <-statusChan:
|
||||
case <-time.After(time.Second):
|
||||
logrus.Warnf("Timed out while waiting for StartTransientUnit(%s) completion signal from dbus. Continuing...", group)
|
||||
}
|
||||
} else if !isUnitExists(err) {
|
||||
return &Manager{}, err
|
||||
}
|
||||
|
||||
return &Manager{
|
||||
path: path,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func LoadSystemd(slice, group string) (*Manager, error) {
|
||||
if slice == "" {
|
||||
slice = defaultSlice
|
||||
}
|
||||
path := getSystemdFullPath(slice, group)
|
||||
return &Manager{
|
||||
path: path,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (c *Manager) DeleteSystemd() error {
|
||||
ctx := context.TODO()
|
||||
conn, err := systemdDbus.NewWithContext(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer conn.Close()
|
||||
group := systemdUnitFromPath(c.path)
|
||||
ch := make(chan string)
|
||||
_, err = conn.StopUnitContext(ctx, group, "replace", ch)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
<-ch
|
||||
return nil
|
||||
}
|
||||
|
||||
func newSystemdProperty(name string, units interface{}) systemdDbus.Property {
|
||||
return systemdDbus.Property{
|
||||
Name: name,
|
||||
Value: dbus.MakeVariant(units),
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue