From 8e66f7053c583edbd87940bcef09b764a6f17567 Mon Sep 17 00:00:00 2001 From: aram price Date: Fri, 28 Jun 2024 15:09:01 -0700 Subject: [PATCH] Experimental support for cgroups v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently one test in the integration specs is failing. Unclear if this is the fault of my docker setup or if this represents an actual issue with how `runc` is being setup. Failure: ``` ------------------------------ • [FAILED] [0.167 seconds] resource limits memory [It] gets OOMed when it exceeds its memory limit /bpm/src/bpm/integration/resource_limits_test.go:116 Timeline >> If this test fails, then make sure you have enabled swap accounting! Details are in the README. Error: failed to start job-process: exit status 1 [FAILED] in [It] - /bpm/src/bpm/integration/resource_limits_test.go:122 @ 06/28/24 22:07:30.852 BEGIN '/bpmtmp/resource-limits-test1115196611/sys/log/0cf7c73a-0b65-4f46-90a7-eb0bb809e6c2/0cf7c73a-0b65-4f46-90a7-eb0bb809e6c2.stderr.log' time="2024-06-28T22:07:30Z" level=warning msg="unable to get oom kill count" error="openat2 /sys/fs/cgroup/bpm-0cf7c73a-0b65-4f46-90a7-eb0bb809e6c2/memory.events: no such file or directory" time="2024-06-28T22:07:30Z" level=error msg="runc run failed: unable to start container process: unable to apply cgroup configuration: cannot enter cgroupv2 \"/sys/fs/cgroup/bpm-0cf7c73a-0b65-4f46-90a7-eb0bb809e6c2\" with domain controllers -- it is in an invalid state" END '/bpmtmp/resource-limits-test1115196611/sys/log/0cf7c73a-0b65-4f46-90a7-eb0bb809e6c2/0cf7c73a-0b65-4f46-90a7-eb0bb809e6c2.stderr.log' BEGIN '/bpmtmp/resource-limits-test1115196611/sys/log/0cf7c73a-0b65-4f46-90a7-eb0bb809e6c2/0cf7c73a-0b65-4f46-90a7-eb0bb809e6c2.stdout.log' END '/bpmtmp/resource-limits-test1115196611/sys/log/0cf7c73a-0b65-4f46-90a7-eb0bb809e6c2/0cf7c73a-0b65-4f46-90a7-eb0bb809e6c2.stdout.log' << Timeline [FAILED] Expected : 1 to match exit code: : 0 In [It] at: /bpm/src/bpm/integration/resource_limits_test.go:122 @ 06/28/24 22:07:30.852 ------------------------------ •••••••••••••••••••••••••• ------------------------------ • [FAILED] [0.364 seconds] start when a broken runc configuration is left on the system [It] `bpm start` cleans up the broken-ness and starts it /bpm/src/bpm/integration/start_test.go:329 Timeline >> Error: failed to start job-process: exit status 1 [FAILED] in [It] - /bpm/src/bpm/integration/start_test.go:337 @ 06/28/24 22:07:31.915 BEGIN '/bpmtmp/start-test2475062763/sys/log/e599a26c-5d89-421d-a740-04dd490c314b/e599a26c-5d89-421d-a740-04dd490c314b.stdout.log' en_US.UTF-8 Logging to STDOUT Received a TERM signal END '/bpmtmp/start-test2475062763/sys/log/e599a26c-5d89-421d-a740-04dd490c314b/e599a26c-5d89-421d-a740-04dd490c314b.stdout.log' BEGIN '/bpmtmp/start-test2475062763/sys/log/e599a26c-5d89-421d-a740-04dd490c314b/e599a26c-5d89-421d-a740-04dd490c314b.stderr.log' Logging to STDERR [WARN tini (1)] Reaped zombie process with pid=8 time="2024-06-28T22:07:31Z" level=error msg="runc run failed: unable to get cgroup PIDs: read /sys/fs/cgroup/bpm-e599a26c-5d89-421d-a740-04dd490c314b/cgroup.procs: operation not supported" END '/bpmtmp/start-test2475062763/sys/log/e599a26c-5d89-421d-a740-04dd490c314b/e599a26c-5d89-421d-a740-04dd490c314b.stderr.log' << Timeline [FAILED] Expected : 1 to match exit code: : 0 In [It] at: /bpm/src/bpm/integration/start_test.go:337 @ 06/28/24 22:07:31.915 ------------------------------ ••••••••••••••••••••••••••••• Summarizing 2 Failures: [FAIL] resource limits memory [It] gets OOMed when it exceeds its memory limit /bpm/src/bpm/integration/resource_limits_test.go:122 [FAIL] start when a broken runc configuration is left on the system [It] `bpm start` cleans up the broken-ness and starts it /bpm/src/bpm/integration/start_test.go:337 Ran 69 of 69 Specs in 27.622 seconds FAIL! -- 67 Passed | 2 Failed | 0 Pending | 0 Skipped ``` --- src/bpm/cgroups/cgroup.go | 4 ++++ src/bpm/sysfeat/sysfeat.go | 42 ++++++++++++++++++++++++++++++++------ 2 files changed, 40 insertions(+), 6 deletions(-) diff --git a/src/bpm/cgroups/cgroup.go b/src/bpm/cgroups/cgroup.go index 01bd6435..6a3e5513 100644 --- a/src/bpm/cgroups/cgroup.go +++ b/src/bpm/cgroups/cgroup.go @@ -32,6 +32,10 @@ import ( const cgroupRoot = "/sys/fs/cgroup" func Setup() error { + if cgroups.IsCgroup2UnifiedMode() { + return nil + } + mounts, err := mountinfo.GetMounts(mountinfo.ParentsFilter(cgroupRoot)) if err != nil { return fmt.Errorf("unable to retrieve mounts: %s", err) diff --git a/src/bpm/sysfeat/sysfeat.go b/src/bpm/sysfeat/sysfeat.go index 2ced385e..51a1eabe 100644 --- a/src/bpm/sysfeat/sysfeat.go +++ b/src/bpm/sysfeat/sysfeat.go @@ -25,7 +25,11 @@ import ( ) const ( - swapPath = "memory.memsw.limit_in_bytes" + swapPathCgroup1 = "memory.memsw.limit_in_bytes" + swapPathCgroup2 = "memory.swap.max" + + unifiedMountpoint = "/sys/fs/cgroup" + hybridMountpoint = "/sys/fs/cgroup/unified" ) // Features contains information about what features the host system supports. @@ -35,17 +39,43 @@ type Features struct { } func Fetch() (*Features, error) { - mountpoint, err := cgroups.FindCgroupMountpoint("", "memory") + supported, err := swapLimitSupported() if err != nil { return nil, err } return &Features{ - SwapLimitSupported: swapLimitSupported(mountpoint), + SwapLimitSupported: supported, }, nil } -func swapLimitSupported(mount string) bool { - _, err := os.Stat(filepath.Join(mount, swapPath)) - return err == nil +func swapLimitSupported() (bool, error) { + if cgroups.IsCgroup2UnifiedMode() { + return swapLimitSupportedCgroup2() + } + + return swapLimitSupportedCgroup1() +} + +func swapLimitSupportedCgroup2() (bool, error) { + mountpoint := unifiedMountpoint + if cgroups.IsCgroup2HybridMode() { + mountpoint = hybridMountpoint + } + + if cgroups.PathExists(filepath.Join(mountpoint, swapPathCgroup2)) { + return true, nil + } + + return false, nil +} + +func swapLimitSupportedCgroup1() (bool, error) { + mountPoint, err := cgroups.FindCgroupMountpoint("", "memory") + if err != nil { + return false, err + } + + _, err = os.Stat(filepath.Join(mountPoint, swapPathCgroup1)) + return err == nil, nil }