From 470c90dbab41a8959fdf0b5bc54122b1ba20e1cf Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Thu, 6 Aug 2015 11:01:22 -0700 Subject: [PATCH 1/2] Drop linux.devices Copying devices from the runtime host isn't particularly portable, and it's easy to mount any device nodes you need from the bundle itself (just like we'd mount any other files needed by the container). Signed-off-by: W. Trevor King --- config-linux.md | 22 ++++++++-------------- spec_linux.go | 2 -- 2 files changed, 8 insertions(+), 16 deletions(-) diff --git a/config-linux.md b/config-linux.md index 1927dae55..392c8bf2a 100644 --- a/config-linux.md +++ b/config-linux.md @@ -55,21 +55,13 @@ within the container. ### Access to devices -Devices is an array specifying the list of devices from the host to make available in the container. -By providing a device name within the list the runtime should look up the same device on the host's `/dev` -and collect information about the device node so that it can be recreated for the container. The runtime -should not only create the device inside the container but ensure that the root user inside -the container has access rights for the device. +Devices required by the application should be supplied via the bundle filesystems and mounted via [mounts][]. +Bundle authors can create these files using [`mknod`][] or by copying nodes from their local host. +For example: -```json - "devices": [ - "null", - "random", - "full", - "tty", - "zero", - "urandom" - ] +```shell +$ mknod --mode a=rw rootfs/dev/random c 1 8 +$ cp --archive /dev/tty rootfs/dev/tty ``` ## Linux control groups @@ -150,3 +142,5 @@ rootfsPropagation sets the rootfs's mount propagation. Its value is either slave **TODO:** security profiles +[mounts]: config.md#mount-configuration +[mknod]: http://linux.die.net/man/1/mknod diff --git a/spec_linux.go b/spec_linux.go index 3c82db4b4..98f2c8b5b 100644 --- a/spec_linux.go +++ b/spec_linux.go @@ -26,8 +26,6 @@ type Linux struct { Namespaces []Namespace `json:"namespaces"` // Capabilities are Linux capabilities that are kept for the container Capabilities []string `json:"capabilities"` - // Devices are a list of device nodes that are created and enabled for the container - Devices []string `json:"devices"` // RootfsPropagation is the rootfs mount propagation mode for the container RootfsPropagation string `json:"rootfsPropagation"` } From 335cd2135cba4543fce6685638a517f88c888ee2 Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Thu, 6 Aug 2015 11:27:50 -0700 Subject: [PATCH 2/2] Add linux.resources.devices For specifying device cgroups independent of device creation. I also split the cgroups section into sections for each class (the earlier docs were very terse). I'll flesh these sections out in future commits if the devices addition sounds acceptable. Signed-off-by: W. Trevor King --- config-linux.md | 60 ++++++++++++++++++++++++++++++++++++++++++++++--- spec_linux.go | 17 ++++++++++++++ 2 files changed, 74 insertions(+), 3 deletions(-) diff --git a/config-linux.md b/config-linux.md index 392c8bf2a..86ae92d64 100644 --- a/config-linux.md +++ b/config-linux.md @@ -66,9 +66,60 @@ $ cp --archive /dev/tty rootfs/dev/tty ## Linux control groups -Also known as cgroups, they are used to restrict resource usage for a container and handle -device access. cgroups provide controls to restrict cpu, memory, IO, and network for -the container. For more information, see the [kernel cgroups documentation](https://www.kernel.org/doc/Documentation/cgroups/cgroups.txt) +Also known as cgroups, they are used to restrict resource usage for a container and handle device access. +For more information, see the [kernel cgroups documentation][cgroups]. +You can configure a container's cgroups via the "resources" field of the Linux configuration. + +### Disable out-of-memory killer + +FIXME + +### Memory + +FIXME + +### CPU + +FIXME + +### Block I/O + +FIXME + +### Devices + +Container-side devices are [mounted from the bundle filesystems][mount-devices]. +Bundle authors can set major and minor nodes, owner IDs, filesystem permissions, etc. by altering those filesystems. +However, you cannot pass cgroup information via the bundle filesystem, so bundle authors that need special device cgroups should use the "devices" field of the resource configuration. +The fields are discussed [in the kernel documentation][cgroups-devices]. +The entries are applied to the container in the order that they are listed in the configuration. + +```json + "devices": [ + { + "allow": false, + "type": "a", + "major": "*", + "minor": "*", + "access": "rwm", + }, + { + "allow": true, + "type": "c", + "major": "1", + "minor": "3", + "access": "mr", + } + ] +``` + +### Huge page limits + +FIXME + +### Network + +FIXME ## Linux capabilities @@ -144,3 +195,6 @@ rootfsPropagation sets the rootfs's mount propagation. Its value is either slave [mounts]: config.md#mount-configuration [mknod]: http://linux.die.net/man/1/mknod +[cgroups]: https://www.kernel.org/doc/Documentation/cgroups/cgroups.txt +[cgroups-devices]: https://www.kernel.org/doc/Documentation/cgroups/devices.txt +[mount-devices]: #access-to-devices diff --git a/spec_linux.go b/spec_linux.go index 98f2c8b5b..9d111544c 100644 --- a/spec_linux.go +++ b/spec_linux.go @@ -100,6 +100,21 @@ type BlockIO struct { ThrottleWriteIOpsDevice string `json:"blkioThrottleWriteIopsDevice"` } +// Device rule for Linux cgroup management +type Device struct { + // Whether the device is allowed (true) or denied (false) + Allow bool `json:"allow"` + // a (all), c (char), or b (block). 'all' means it applies to all + // types and all major and minor numbers + Type string `json:type` + // Major number. Either an integer or '*' for all. + Major string `json:major` + // Minor number. Either an integer or '*' for all. + Minor string `json:minor` + // a composition of r (read), w (write), and m (mknod). + Access string `json:access` +} + // Memory for Linux cgroup 'memory' resource management type Memory struct { // Memory limit (in bytes) @@ -150,6 +165,8 @@ type Resources struct { CPU CPU `json:"cpu"` // BlockIO restriction configuration BlockIO BlockIO `json:"blockIO"` + // Device configuration + Devices []Device `json:"devices"` // Hugetlb limit (in bytes) HugepageLimits []HugepageLimit `json:"hugepageLimits"` // Network restriction configuration