[lxc-devel] [lxd/master] lxd/cgroups: enable cgroup2 limit support

brauner on Github lxc-bot at linuxcontainers.org
Mon Dec 16 23:57:08 UTC 2019


A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 364 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20191216/9b2a2152/attachment-0001.bin>
-------------- next part --------------
From 954faaeebdf7884a5b818d1bbebdee6a7633d0e3 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Tue, 17 Dec 2019 00:30:25 +0100
Subject: [PATCH] lxd/cgroups: enable cgroup2 limit support

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 lxd/cgroup/init.go     | 88 +++++++++++++++++++++++++++++++++++++++++-
 lxd/container_lxc.go   | 44 ++++++++++-----------
 lxd/container_state.go |  4 +-
 lxd/daemon.go          |  5 +--
 lxd/device/disk.go     |  4 +-
 lxd/devices.go         | 10 ++---
 lxd/sys/cgroup.go      | 58 ----------------------------
 lxd/sys/os.go          | 15 ++-----
 8 files changed, 123 insertions(+), 105 deletions(-)
 delete mode 100644 lxd/sys/cgroup.go

diff --git a/lxd/cgroup/init.go b/lxd/cgroup/init.go
index 97bf705bbf..356f10a2ac 100644
--- a/lxd/cgroup/init.go
+++ b/lxd/cgroup/init.go
@@ -6,6 +6,9 @@ import (
 	"path/filepath"
 	"strings"
 
+	lxc "gopkg.in/lxc/go-lxc.v2"
+
+	"github.com/lxc/lxd/shared"
 	"github.com/lxc/lxd/shared/logger"
 )
 
@@ -13,6 +16,7 @@ var cgCgroup2SuperMagic int64 = 0x63677270
 
 var cgControllers = map[string]Backend{}
 var cgNamespace bool
+var lxcCgroup2Support bool
 
 // Layout determines the cgroup layout on this system
 type Layout int
@@ -44,7 +48,6 @@ func GetInfo() Info {
 	info := Info{}
 	info.Namespacing = cgNamespace
 	info.Layout = cgLayout
-
 	return info
 }
 
@@ -64,6 +67,76 @@ func (info *Info) Mode() string {
 	return "unknown"
 }
 
+// Supports indicates whether or not a given cgroup control knob is available.
+// Note, we use "knob" instead of "controller" because this map holds
+// controllers as well as new features for a given controller, i.e. you can
+// have "blkio" which is a controller and "blkio.weight" which is a feature of
+// the blkio controller.
+func (info *Info) Supports(knob string) bool {
+	_, ok := cgControllers[knob]
+	return ok
+}
+
+// SupportsV1 indicated whether a given controller knob is available in the
+// legacy hierarchy. Once we're fully ported this should be removed.
+func (info *Info) SupportsV1(knob string) bool {
+	val, ok := cgControllers[knob]
+	if ok && val == V1 {
+		return true
+	}
+
+	return false
+}
+
+// Log logs cgroup info
+func (info *Info) Log() {
+	logger.Infof(" - cgroup layout: %s", info.Mode())
+
+	if !info.Supports("blkio") {
+		logger.Warnf(" - Couldn't find the CGroup blkio, I/O limits will be ignored")
+	}
+
+	if !info.Supports("blkio.weight") {
+		logger.Warnf(" - Couldn't find the CGroup blkio.weight, I/O weight limits will be ignored")
+	}
+
+	if !info.Supports("cpu") {
+		logger.Warnf(" - Couldn't find the CGroup CPU controller, CPU time limits will be ignored")
+	}
+
+	if !info.Supports("cpuacct") {
+		logger.Warnf(" - Couldn't find the CGroup CPUacct controller, CPU accounting will not be available")
+	}
+
+	if !info.Supports("cpuset") {
+		logger.Warnf(" - Couldn't find the CGroup CPUset controller, CPU pinning will be ignored")
+	}
+
+	if !info.Supports("devices") {
+		logger.Warnf(" - Couldn't find the CGroup devices controller, device access control won't work")
+	}
+
+	if !info.Supports("freezer") {
+		logger.Warnf(" - Couldn't find the CGroup freezer controller, pausing/resuming containers won't work")
+	}
+
+	if !info.Supports("memory") {
+		logger.Warnf(" - Couldn't find the CGroup memory controller, memory limits will be ignored")
+	}
+
+	if !info.Supports("net_prio") {
+		logger.Warnf(" - Couldn't find the CGroup network class controller, network limits will be ignored")
+	}
+
+	if !info.Supports("pids") {
+		logger.Warnf(" - Couldn't find the CGroup pids controller, process limits will be ignored")
+	}
+
+	if !info.Supports("memory.memsw.limit_in_bytes") {
+		logger.Warnf(" - Couldn't find the CGroup memory swap accounting, swap limits will be ignored")
+	}
+}
+
 func init() {
 	_, err := os.Stat("/proc/self/ns/cgroup")
 	if err == nil {
@@ -135,11 +208,24 @@ func init() {
 		}
 	}
 
+	// Check for additional legacy cgroup features
+	val, ok := cgControllers["blkio"]
+	if ok && val == V1 && shared.PathExists("/sys/fs/cgroup/blkio/blkio.weight") {
+		cgControllers["blkio.weight"] = V1
+	}
+
+	val, ok = cgControllers["memory"]
+	if ok && val == V1 && shared.PathExists("/sys/fs/cgroup/memory/memory.memsw.limit_in_bytes") {
+		cgControllers["memory.memsw.limit_in_bytes"] = V2
+	}
+
 	if hasV1 && hasV2 {
 		cgLayout = CgroupsHybrid
+		lxcCgroup2Support = lxc.HasApiExtension("cgroup2")
 	} else if hasV1 {
 		cgLayout = CgroupsLegacy
 	} else if hasV2 {
 		cgLayout = CgroupsUnified
+		lxcCgroup2Support = lxc.HasApiExtension("cgroup2")
 	}
 }
diff --git a/lxd/container_lxc.go b/lxd/container_lxc.go
index 726c852ae2..78e9982e3d 100644
--- a/lxd/container_lxc.go
+++ b/lxd/container_lxc.go
@@ -856,7 +856,7 @@ func (c *containerLXC) initLXC(config bool) error {
 	}
 
 	// Configure devices cgroup
-	if c.IsPrivileged() && !c.state.OS.RunningInUserNS && c.state.OS.CGroupDevicesController {
+	if c.IsPrivileged() && !c.state.OS.RunningInUserNS && c.state.OS.CGInfo.Supports("devices") {
 		err = lxcSetConfigItem(cc, "lxc.cgroup.devices.deny", "a")
 		if err != nil {
 			return err
@@ -1091,7 +1091,7 @@ func (c *containerLXC) initLXC(config bool) error {
 	}
 
 	// Memory limits
-	if c.state.OS.CGroupMemoryController {
+	if c.state.OS.CGInfo.Supports("memory") {
 		memory := c.expandedConfig["limits.memory"]
 		memoryEnforce := c.expandedConfig["limits.memory.enforce"]
 		memorySwap := c.expandedConfig["limits.memory.swap"]
@@ -1125,7 +1125,7 @@ func (c *containerLXC) initLXC(config bool) error {
 					return err
 				}
 			} else {
-				if c.state.OS.CGroupSwapAccounting && (memorySwap == "" || shared.IsTrue(memorySwap)) {
+				if c.state.OS.CGInfo.Supports("memory.memsw.limit_in_bytes") && (memorySwap == "" || shared.IsTrue(memorySwap)) {
 					err = cg.SetMemoryMaxUsage(fmt.Sprintf("%d", valueInt))
 					if err != nil {
 						return err
@@ -1170,7 +1170,7 @@ func (c *containerLXC) initLXC(config bool) error {
 	cpuPriority := c.expandedConfig["limits.cpu.priority"]
 	cpuAllowance := c.expandedConfig["limits.cpu.allowance"]
 
-	if (cpuPriority != "" || cpuAllowance != "") && c.state.OS.CGroupCPUController {
+	if (cpuPriority != "" || cpuAllowance != "") && c.state.OS.CGInfo.Supports("cpu") {
 		cpuShares, cpuCfsQuota, cpuCfsPeriod, err := cgroup.ParseCPU(cpuAllowance, cpuPriority)
 		if err != nil {
 			return err
@@ -1199,7 +1199,7 @@ func (c *containerLXC) initLXC(config bool) error {
 	}
 
 	// Processes
-	if c.state.OS.CGroupPidsController {
+	if c.state.OS.CGInfo.Supports("pids") {
 		processes := c.expandedConfig["limits.processes"]
 		if processes != "" {
 			valueInt, err := strconv.ParseInt(processes, 10, 64)
@@ -1410,7 +1410,7 @@ func (c *containerLXC) deviceStaticShiftMounts(mounts []deviceConfig.MountEntryI
 func (c *containerLXC) deviceAddCgroupRules(cgroups []deviceConfig.RunConfigItem) error {
 	for _, rule := range cgroups {
 		// Only apply devices cgroup rules if container is running privileged and host has devices cgroup controller.
-		if strings.HasPrefix(rule.Key, "devices.") && (!c.isCurrentlyPrivileged() || c.state.OS.RunningInUserNS || !c.state.OS.CGroupDevicesController) {
+		if strings.HasPrefix(rule.Key, "devices.") && (!c.isCurrentlyPrivileged() || c.state.OS.RunningInUserNS || !c.state.OS.CGInfo.Supports("devices")) {
 			continue
 		}
 
@@ -2606,10 +2606,10 @@ func (c *containerLXC) Stop(stateful bool) error {
 	}
 
 	// Fork-bomb mitigation, prevent forking from this point on
-	if c.state.OS.CGroupPidsController {
+	if c.state.OS.CGInfo.Supports("pids") {
 		// Attempt to disable forking new processes
 		cg.SetMaxProcesses(0)
-	} else if c.state.OS.CGroupFreezerController {
+	} else if c.state.OS.CGInfo.Supports("freezer") {
 		// Attempt to freeze the container
 		freezer := make(chan bool, 1)
 		go func() {
@@ -2863,7 +2863,7 @@ func (c *containerLXC) Freeze() error {
 	}
 
 	// Check if the CGroup is available
-	if !c.state.OS.CGroupFreezerController {
+	if !c.state.OS.CGInfo.Supports("freezer") {
 		logger.Info("Unable to freeze container (lack of kernel support)", ctxMap)
 		return nil
 	}
@@ -2911,7 +2911,7 @@ func (c *containerLXC) Unfreeze() error {
 	}
 
 	// Check if the CGroup is available
-	if !c.state.OS.CGroupFreezerController {
+	if !c.state.OS.CGInfo.Supports("freezer") {
 		logger.Info("Unable to unfreeze container (lack of kernel support)", ctxMap)
 		return nil
 	}
@@ -4203,7 +4203,7 @@ func (c *containerLXC) Update(args db.InstanceArgs, userRequested bool) error {
 					}
 				}
 			} else if key == "limits.disk.priority" {
-				if !c.state.OS.CGroupBlkioController {
+				if !c.state.OS.CGInfo.Supports("blkio") {
 					continue
 				}
 
@@ -4228,7 +4228,7 @@ func (c *containerLXC) Update(args db.InstanceArgs, userRequested bool) error {
 				}
 			} else if key == "limits.memory" || strings.HasPrefix(key, "limits.memory.") {
 				// Skip if no memory CGroup
-				if !c.state.OS.CGroupMemoryController {
+				if !c.state.OS.CGInfo.Supports("memory") {
 					continue
 				}
 
@@ -4262,7 +4262,7 @@ func (c *containerLXC) Update(args db.InstanceArgs, userRequested bool) error {
 
 				// Store the old values for revert
 				oldMemswLimit := ""
-				if c.state.OS.CGroupSwapAccounting {
+				if c.state.OS.CGInfo.Supports("memory.memsw.limit_in_bytes") {
 					oldMemswLimit, err = cg.GetMemorySwapLimit()
 					if err != nil {
 						oldMemswLimit = ""
@@ -4292,7 +4292,7 @@ func (c *containerLXC) Update(args db.InstanceArgs, userRequested bool) error {
 				}
 
 				// Reset everything
-				if c.state.OS.CGroupSwapAccounting {
+				if c.state.OS.CGInfo.Supports("memory.memsw.limit_in_bytes") {
 					err = cg.SetMemorySwapMax("-1")
 					if err != nil {
 						revertMemory()
@@ -4319,7 +4319,7 @@ func (c *containerLXC) Update(args db.InstanceArgs, userRequested bool) error {
 						return err
 					}
 				} else {
-					if c.state.OS.CGroupSwapAccounting && (memorySwap == "" || shared.IsTrue(memorySwap)) {
+					if c.state.OS.CGInfo.Supports("memory.memsw.limit_in_bytes") && (memorySwap == "" || shared.IsTrue(memorySwap)) {
 						err = cg.SetMemoryMaxUsage(memory)
 						if err != nil {
 							revertMemory()
@@ -4384,7 +4384,7 @@ func (c *containerLXC) Update(args db.InstanceArgs, userRequested bool) error {
 				cgroup.TaskSchedulerTrigger("container", c.name, "changed")
 			} else if key == "limits.cpu.priority" || key == "limits.cpu.allowance" {
 				// Skip if no cpu CGroup
-				if !c.state.OS.CGroupCPUController {
+				if !c.state.OS.CGInfo.Supports("cpu") {
 					continue
 				}
 
@@ -4407,7 +4407,7 @@ func (c *containerLXC) Update(args db.InstanceArgs, userRequested bool) error {
 					return err
 				}
 			} else if key == "limits.processes" {
-				if !c.state.OS.CGroupPidsController {
+				if !c.state.OS.CGInfo.Supports("pids") {
 					continue
 				}
 
@@ -5732,7 +5732,7 @@ func (c *containerLXC) Exec(command []string, env map[string]string, stdin *os.F
 func (c *containerLXC) cpuState() api.InstanceStateCPU {
 	cpu := api.InstanceStateCPU{}
 
-	if !c.state.OS.CGroupCPUacctController {
+	if !c.state.OS.CGInfo.Supports("cpuacct") {
 		return cpu
 	}
 
@@ -5813,7 +5813,7 @@ func (c *containerLXC) memoryState() api.InstanceStateMemory {
 		return memory
 	}
 
-	if !c.state.OS.CGroupMemoryController {
+	if !c.state.OS.CGInfo.Supports("memory") {
 		return memory
 	}
 
@@ -5831,7 +5831,7 @@ func (c *containerLXC) memoryState() api.InstanceStateMemory {
 		memory.UsagePeak = valueInt
 	}
 
-	if c.state.OS.CGroupSwapAccounting {
+	if c.state.OS.CGInfo.Supports("memory.memsw.limit_in_bytes") {
 		// Swap in bytes
 		if memory.Usage > 0 {
 			value, err := cg.GetMemorySwapUsage()
@@ -5919,7 +5919,7 @@ func (c *containerLXC) processesState() int64 {
 		return 0
 	}
 
-	if c.state.OS.CGroupPidsController {
+	if c.state.OS.CGInfo.Supports("pids") {
 		cg, err := c.cgroup(nil)
 		if err != nil {
 			return 0
@@ -6509,7 +6509,7 @@ func (c *containerLXC) setNetworkPriority() error {
 	}
 
 	// Don't bother if the cgroup controller doesn't exist
-	if !c.state.OS.CGroupNetPrioController {
+	if !c.state.OS.CGInfo.Supports("net_prio") {
 		return nil
 	}
 
diff --git a/lxd/container_state.go b/lxd/container_state.go
index 2c55c022ad..f8ca4ace4a 100644
--- a/lxd/container_state.go
+++ b/lxd/container_state.go
@@ -190,7 +190,7 @@ func containerStatePut(d *Daemon, r *http.Request) response.Response {
 			return nil
 		}
 	case shared.Freeze:
-		if !d.os.CGroupFreezerController {
+		if !d.os.CGInfo.Supports("freezer") {
 			return response.BadRequest(fmt.Errorf("This system doesn't support freezing containers"))
 		}
 
@@ -200,7 +200,7 @@ func containerStatePut(d *Daemon, r *http.Request) response.Response {
 			return c.Freeze()
 		}
 	case shared.Unfreeze:
-		if !d.os.CGroupFreezerController {
+		if !d.os.CGInfo.Supports("freezer") {
 			return response.BadRequest(fmt.Errorf("This system doesn't support unfreezing containers"))
 		}
 
diff --git a/lxd/daemon.go b/lxd/daemon.go
index 13ae45493a..fdbb60735f 100644
--- a/lxd/daemon.go
+++ b/lxd/daemon.go
@@ -28,7 +28,6 @@ import (
 	"gopkg.in/macaroon-bakery.v2/bakery/identchecker"
 	"gopkg.in/macaroon-bakery.v2/httpbakery"
 
-	"github.com/lxc/lxd/lxd/cgroup"
 	"github.com/lxc/lxd/lxd/cluster"
 	"github.com/lxc/lxd/lxd/daemon"
 	"github.com/lxc/lxd/lxd/db"
@@ -623,8 +622,7 @@ func (d *Daemon) init() error {
 		logger.Infof(" - unprivileged file capabilities: no")
 	}
 
-	cgroups := cgroup.GetInfo()
-	logger.Infof(" - cgroup layout: %s", cgroups.Mode())
+	d.os.CGInfo.Log()
 
 	// Detect shiftfs support.
 	if shared.IsTrue(os.Getenv("LXD_SHIFTFS_DISABLE")) {
@@ -648,6 +646,7 @@ func (d *Daemon) init() error {
 		"network_gateway_device_route",
 		"network_phys_macvlan_mtu",
 		"network_veth_router",
+		"cgroup2",
 	}
 	for _, extension := range lxcExtensions {
 		d.os.LXCFeatures[extension] = lxc.HasApiExtension(extension)
diff --git a/lxd/device/disk.go b/lxd/device/disk.go
index 1f10f2055c..bee1b635de 100644
--- a/lxd/device/disk.go
+++ b/lxd/device/disk.go
@@ -468,7 +468,7 @@ func (d *disk) generateLimits(runConf *deviceConfig.RunConfig) error {
 	// Disk priority limits.
 	diskPriority := d.instance.ExpandedConfig()["limits.disk.priority"]
 	if diskPriority != "" {
-		if d.state.OS.CGroupBlkioWeightController {
+		if d.state.OS.CGInfo.Supports("blkio.weight") {
 			priorityInt, err := strconv.Atoi(diskPriority)
 			if err != nil {
 				return err
@@ -503,7 +503,7 @@ func (d *disk) generateLimits(runConf *deviceConfig.RunConfig) error {
 	}
 
 	if hasDiskLimits {
-		if !d.state.OS.CGroupBlkioController {
+		if !d.state.OS.CGInfo.Supports("blkio") {
 			return fmt.Errorf("Cannot apply disk limits as blkio cgroup controller is missing")
 		}
 
diff --git a/lxd/devices.go b/lxd/devices.go
index fb8a84f733..b90bb87e75 100644
--- a/lxd/devices.go
+++ b/lxd/devices.go
@@ -229,7 +229,7 @@ func deviceTaskBalance(s *state.State) {
 	}
 
 	// Don't bother running when CGroup support isn't there
-	if !s.OS.CGroupCPUsetController {
+	if !s.OS.CGInfo.SupportsV1("cpuset") {
 		return
 	}
 
@@ -409,7 +409,7 @@ func deviceTaskBalance(s *state.State) {
 
 func deviceNetworkPriority(s *state.State, netif string) {
 	// Don't bother running when CGroup support isn't there
-	if !s.OS.CGroupNetPrioController {
+	if !s.OS.CGInfo.Supports("net_prio") {
 		return
 	}
 
@@ -452,7 +452,7 @@ func deviceEventListener(s *state.State) {
 				continue
 			}
 
-			if !s.OS.CGroupCPUsetController {
+			if !s.OS.CGInfo.SupportsV1("cpuset") {
 				continue
 			}
 
@@ -464,7 +464,7 @@ func deviceEventListener(s *state.State) {
 				continue
 			}
 
-			if !s.OS.CGroupNetPrioController {
+			if !s.OS.CGInfo.Supports("net_prio") {
 				continue
 			}
 
@@ -479,7 +479,7 @@ func deviceEventListener(s *state.State) {
 				continue
 			}
 
-			if !s.OS.CGroupCPUsetController {
+			if !s.OS.CGInfo.SupportsV1("cpuset") {
 				continue
 			}
 
diff --git a/lxd/sys/cgroup.go b/lxd/sys/cgroup.go
deleted file mode 100644
index 9faaf7b775..0000000000
--- a/lxd/sys/cgroup.go
+++ /dev/null
@@ -1,58 +0,0 @@
-// +build linux,cgo,!agent
-
-package sys
-
-import (
-	"fmt"
-
-	"github.com/lxc/lxd/shared"
-	"github.com/lxc/lxd/shared/logger"
-)
-
-// Detect CGroup support.
-func (s *OS) initCGroup() {
-	flags := []*bool{
-		&s.CGroupBlkioController,
-		&s.CGroupBlkioWeightController,
-		&s.CGroupCPUController,
-		&s.CGroupCPUacctController,
-		&s.CGroupCPUsetController,
-		&s.CGroupDevicesController,
-		&s.CGroupFreezerController,
-		&s.CGroupMemoryController,
-		&s.CGroupNetPrioController,
-		&s.CGroupPidsController,
-		&s.CGroupSwapAccounting,
-	}
-	for i, flag := range flags {
-		*flag = shared.PathExists("/sys/fs/cgroup/" + cGroups[i].path)
-		if !*flag {
-			logger.Warnf(cGroups[i].warn)
-		}
-	}
-}
-
-func cGroupMissing(name, message string) string {
-	return fmt.Sprintf("Couldn't find the CGroup %s, %s.", name, message)
-}
-
-func cGroupDisabled(name, message string) string {
-	return fmt.Sprintf("CGroup %s is disabled, %s.", name, message)
-}
-
-var cGroups = []struct {
-	path string
-	warn string
-}{
-	{"blkio", cGroupMissing("blkio", "I/O limits will be ignored")},
-	{"blkio/blkio.weight", cGroupMissing("blkio.weight", "I/O weight limits will be ignored")},
-	{"cpu", cGroupMissing("CPU controller", "CPU time limits will be ignored")},
-	{"cpuacct", cGroupMissing("CPUacct controller", "CPU accounting will not be available")},
-	{"cpuset", cGroupMissing("CPUset controller", "CPU pinning will be ignored")},
-	{"devices", cGroupMissing("devices controller", "device access control won't work")},
-	{"freezer", cGroupMissing("freezer controller", "pausing/resuming containers won't work")},
-	{"memory", cGroupMissing("memory controller", "memory limits will be ignored")},
-	{"net_prio", cGroupMissing("network class controller", "network limits will be ignored")},
-	{"pids", cGroupMissing("pids controller", "process limits will be ignored")},
-	{"memory/memory.memsw.limit_in_bytes", cGroupDisabled("memory swap accounting", "swap limits will be ignored")},
-}
diff --git a/lxd/sys/os.go b/lxd/sys/os.go
index feb5c8d184..4c7af7b6c8 100644
--- a/lxd/sys/os.go
+++ b/lxd/sys/os.go
@@ -8,6 +8,7 @@ import (
 
 	log "github.com/lxc/lxd/shared/log15"
 
+	"github.com/lxc/lxd/lxd/cgroup"
 	"github.com/lxc/lxd/lxd/util"
 	"github.com/lxc/lxd/shared"
 	"github.com/lxc/lxd/shared/idmap"
@@ -58,17 +59,7 @@ type OS struct {
 	AppArmorStacking  bool
 
 	// Cgroup features
-	CGroupBlkioController       bool
-	CGroupBlkioWeightController bool
-	CGroupCPUacctController     bool
-	CGroupCPUController         bool
-	CGroupCPUsetController      bool
-	CGroupDevicesController     bool
-	CGroupFreezerController     bool
-	CGroupMemoryController      bool
-	CGroupNetPrioController     bool
-	CGroupPidsController        bool
-	CGroupSwapAccounting        bool
+	CGInfo cgroup.Info
 
 	// Kernel features
 	NetnsGetifaddrs         bool
@@ -130,7 +121,7 @@ func (s *OS) Init() error {
 	s.RunningInUserNS = shared.RunningInUserNS()
 
 	s.initAppArmor()
-	s.initCGroup()
+	s.CGInfo = cgroup.GetInfo()
 
 	return nil
 }


More information about the lxc-devel mailing list