Skip to content

Commit 3c3d529

Browse files
authored
Merge pull request #357 from giuseppe/systemd_cgroup_v1
cgroup: support mounting name=systemd on cgroup v2
2 parents 63674ec + 5ddf0c4 commit 3c3d529

File tree

3 files changed

+85
-17
lines changed

3 files changed

+85
-17
lines changed

crun.1.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,14 @@ will skip the `setgroups` syscall that is used to either set the
306306
additional groups specified in the OCI configuration, or to reset the
307307
list of additional groups if none is specified.
308308

309+
## `run.oci.systemd.force_cgroup_v1=/PATH`
310+
311+
If the annotation `run.oci.systemd.force_cgroup_v1=/PATH` is present, then crun
312+
will override the specified mount point `/PATH` with a cgroup v1 mount
313+
made of a single hierarchy `none,name=systemd`.
314+
It is useful to run on a cgroup v2 system containers using older
315+
versions of systemd that lack support for cgroup v2.
316+
309317
## `run.oci.timens_offset=ID SEC NSEC`
310318

311319
Specify the offset to be written to /proc/self/timens_offsets when creating

src/libcrun/cgroup.c

Lines changed: 21 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -444,7 +444,7 @@ chown_cgroups (const char *path, uid_t uid, gid_t gid, libcrun_error_t *err)
444444

445445
dir = opendir (cgroup_path);
446446
if (UNLIKELY (dir == NULL))
447-
return crun_make_error (err, errno, "cannot opendir %s", cgroup_path);
447+
return crun_make_error (err, errno, "cannot opendir `%s`", cgroup_path);
448448

449449
dfd = dirfd (dir);
450450

@@ -458,7 +458,7 @@ chown_cgroups (const char *path, uid_t uid, gid_t gid, libcrun_error_t *err)
458458

459459
ret = fchownat (dfd, name, uid, gid, AT_SYMLINK_NOFOLLOW);
460460
if (UNLIKELY (ret < 0))
461-
return crun_make_error (err, errno, "cannot chown %s/%s", cgroup_path, name);
461+
return crun_make_error (err, errno, "cannot chown `%s/%s`", cgroup_path, name);
462462
}
463463

464464
return 0;
@@ -1839,25 +1839,25 @@ write_blkio_resources (int dirfd, bool cgroup2, runtime_spec_schema_config_linux
18391839
}
18401840

18411841
static int
1842-
write_network_resources (int dirfd, runtime_spec_schema_config_linux_resources_network *net, libcrun_error_t *err)
1842+
write_network_resources (int dirfd_netclass, int dirfd_netprio, runtime_spec_schema_config_linux_resources_network *net, libcrun_error_t *err)
18431843
{
18441844
char fmt_buf[128];
18451845
size_t len;
18461846
int ret;
18471847
if (net->class_id)
18481848
{
18491849
len = sprintf (fmt_buf, "%d", net->class_id);
1850-
ret = write_file_at (dirfd, "net_cls.classid", fmt_buf, len, err);
1850+
ret = write_file_at (dirfd_netclass, "net_cls.classid", fmt_buf, len, err);
18511851
if (UNLIKELY (ret < 0))
18521852
return ret;
18531853
}
18541854
if (net->priorities_len)
18551855
{
18561856
size_t i;
18571857
cleanup_close int fd = -1;
1858-
fd = openat (dirfd, "net_prio.ifpriomap", O_WRONLY);
1858+
fd = openat (dirfd_netprio, "net_prio.ifpriomap", O_WRONLY);
18591859
if (UNLIKELY (fd < 0))
1860-
return crun_make_error (err, errno, "open net_prio.ifpriomap");
1860+
return crun_make_error (err, errno, "open `net_prio.ifpriomap`");
18611861

18621862
for (i = 0; i < net->priorities_len; i++)
18631863
{
@@ -2364,16 +2364,24 @@ update_cgroup_v1_resources (runtime_spec_schema_config_linux_resources *resource
23642364

23652365
if (resources->network)
23662366
{
2367-
cleanup_free char *path_to_network = NULL;
2368-
cleanup_close int dirfd_network = -1;
2367+
cleanup_free char *path_to_netclass = NULL;
2368+
cleanup_close int dirfd_netclass = -1;
2369+
cleanup_free char *path_to_netprio = NULL;
2370+
cleanup_close int dirfd_netprio = -1;
23692371
runtime_spec_schema_config_linux_resources_network *network = resources->network;
23702372

2371-
xasprintf (&path_to_network, "/sys/fs/cgroup/net_cls,net_prio%s/", path);
2372-
dirfd_network = open (path_to_network, O_DIRECTORY | O_RDONLY);
2373-
if (UNLIKELY (dirfd_network < 0))
2374-
return crun_make_error (err, errno, "open %s", path_to_network);
2373+
xasprintf (&path_to_netclass, "/sys/fs/cgroup/net_cls%s/", path);
2374+
xasprintf (&path_to_netprio, "/sys/fs/cgroup/net_prio%s/", path);
23752375

2376-
ret = write_network_resources (dirfd_network, network, err);
2376+
dirfd_netclass = open (path_to_netclass, O_DIRECTORY | O_RDONLY);
2377+
if (UNLIKELY (dirfd_netclass < 0))
2378+
return crun_make_error (err, errno, "open `%s`", path_to_netclass);
2379+
2380+
dirfd_netprio = open (path_to_netprio, O_DIRECTORY | O_RDONLY);
2381+
if (UNLIKELY (dirfd_netprio < 0))
2382+
return crun_make_error (err, errno, "open `%s`", path_to_netprio);
2383+
2384+
ret = write_network_resources (dirfd_netclass, dirfd_netprio, network, err);
23772385
if (UNLIKELY (ret < 0))
23782386
return ret;
23792387
}

src/libcrun/linux.c

Lines changed: 56 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -599,6 +599,45 @@ has_mount_for (libcrun_container_t *container, const char *destination)
599599
return false;
600600
}
601601

602+
static int
603+
do_mount_cgroup_systemd_v1 (libcrun_container_t *container,
604+
const char *source,
605+
int targetfd,
606+
const char *target,
607+
unsigned long mountflags,
608+
libcrun_error_t *err)
609+
{
610+
int ret;
611+
cleanup_close int fd = -1;
612+
const char *subsystem = "systemd";
613+
cleanup_free char *subsystem_path = NULL;
614+
cleanup_close int tmpfsdirfd = -1;
615+
616+
mountflags = mountflags & ~MS_BIND;
617+
618+
ret = do_mount (container, source, targetfd, target, "tmpfs", mountflags, "size=1024k", 1, err);
619+
if (UNLIKELY (ret < 0))
620+
return ret;
621+
622+
/* Get a reference to the newly created cgroup directory. */
623+
tmpfsdirfd = open_mount_target (container, target, err);
624+
if (UNLIKELY (tmpfsdirfd < 0))
625+
return tmpfsdirfd;
626+
targetfd = tmpfsdirfd;
627+
628+
ret = mkdirat (targetfd, subsystem, 0755);
629+
if (UNLIKELY (ret < 0))
630+
return crun_make_error (err, errno, "mkdir `%s`", subsystem);
631+
632+
fd = openat (targetfd, subsystem, O_CLOEXEC | O_DIRECTORY | O_NOFOLLOW);
633+
if (UNLIKELY (ret < 0))
634+
return crun_make_error (err, errno, "open `%s`", subsystem_path);
635+
636+
xasprintf (&subsystem_path, "%s/%s", target, subsystem);
637+
638+
return do_mount (container, "cgroup", fd, subsystem_path, "cgroup", mountflags, "none,name=systemd,xattr", true, err);
639+
}
640+
602641
static int
603642
do_mount_cgroup_v1 (libcrun_container_t *container,
604643
const char *source,
@@ -623,7 +662,7 @@ do_mount_cgroup_v1 (libcrun_container_t *container,
623662
if (UNLIKELY (subsystems == NULL))
624663
return -1;
625664

626-
ret = do_mount (container, source, targetfd, target, "tmpfs", mountflags, "size=1024k", 1, err);
665+
ret = do_mount (container, source, targetfd, target, "tmpfs", mountflags & ~MS_RDONLY, "size=1024k", true, err);
627666
if (UNLIKELY (ret < 0))
628667
return ret;
629668

@@ -658,7 +697,12 @@ do_mount_cgroup_v1 (libcrun_container_t *container,
658697

659698
it = strstr (subsystem, "name=");
660699
if (it)
661-
subsystem += 5;
700+
subsystem = it + 5;
701+
702+
if (strcmp (subsystem, "net_prio,net_cls") == 0)
703+
subsystem = "net_cls,net_prio";
704+
if (strcmp (subsystem, "cpuacct,cpu") == 0)
705+
subsystem = "cpu,cpuacct";
662706

663707
xasprintf (&source_subsystem, "/sys/fs/cgroup/%s", subsystem);
664708

@@ -1144,6 +1188,8 @@ do_mounts (libcrun_container_t *container, int rootfsfd, const char *rootfs, lib
11441188
int ret;
11451189
runtime_spec_schema_config_schema *def = container->container_def;
11461190
size_t rootfs_len = get_private_data (container)->rootfs_len;
1191+
const char *systemd_cgroup_v1 = find_annotation (container, "run.oci.systemd.force_cgroup_v1");
1192+
11471193
for (i = 0; i < def->mounts_len; i++)
11481194
{
11491195
cleanup_free char *data = NULL;
@@ -1157,7 +1203,6 @@ do_mounts (libcrun_container_t *container, int rootfsfd, const char *rootfs, lib
11571203
cleanup_close int targetfd = -1;
11581204

11591205
target = def->mounts[i]->destination;
1160-
11611206
while (*target == '/')
11621207
target++;
11631208

@@ -1275,7 +1320,14 @@ do_mounts (libcrun_container_t *container, int rootfsfd, const char *rootfs, lib
12751320
if (UNLIKELY (targetfd < 0))
12761321
return targetfd;
12771322

1278-
if (strcmp (type, "cgroup") == 0)
1323+
if (systemd_cgroup_v1 && strcmp (def->mounts[i]->destination, systemd_cgroup_v1) == 0)
1324+
{
1325+
/* Override the cgroup mount with a single named cgroup name=systemd. */
1326+
ret = do_mount_cgroup_systemd_v1 (container, source, targetfd, target, flags, err);
1327+
if (UNLIKELY (ret < 0))
1328+
return ret;
1329+
}
1330+
else if (strcmp (type, "cgroup") == 0)
12791331
{
12801332
ret = do_mount_cgroup (container, source, targetfd, target, flags, err);
12811333
if (UNLIKELY (ret < 0))

0 commit comments

Comments
 (0)