[lxc-devel] [lxc/master] cgroups: add unified hierarchy support
brauner on Github
lxc-bot at linuxcontainers.org
Wed Jan 31 15:51:25 UTC 2018
A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 364 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20180131/d20223ab/attachment.bin>
-------------- next part --------------
From d6337a5f9dc7311af168aa3d586fdf239f5a10d3 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Wed, 31 Jan 2018 16:25:11 +0100
Subject: [PATCH 1/7] cgroups: get controllers on the unified hierarchy
Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
src/lxc/cgroups/cgfsng.c | 403 +++++++++++++++++++++++++++++++----------
src/lxc/cgroups/cgroup.h | 7 +
src/lxc/cgroups/cgroup_utils.c | 6 +-
src/lxc/cgroups/cgroup_utils.h | 4 -
4 files changed, 322 insertions(+), 98 deletions(-)
diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c
index 86b39574d..4724fb5bc 100644
--- a/src/lxc/cgroups/cgfsng.c
+++ b/src/lxc/cgroups/cgfsng.c
@@ -78,7 +78,7 @@ struct hierarchy {
char *mountpoint;
char *base_cgroup;
char *fullcgpath;
- bool is_cgroup_v2;
+ int version;
};
/*
@@ -98,14 +98,17 @@ struct cgfsng_handler_data {
char *name; /* container name */
/* per-container cgroup information */
struct lxc_cgroup cgroup_meta;
+ cgroup_layout_t cgroup_layout;
};
/*
* @hierarchies - a NULL-terminated array of struct hierarchy, one per
- * hierarchy. No duplicates. First sufficient, writeable mounted
- * hierarchy wins
+ * legacy hierarchy. No duplicates. First sufficient, writeable
+ * mounted hierarchy wins
*/
struct hierarchy **hierarchies;
+struct hierarchy *unified;
+cgroup_layout_t cgroup_layout;
/*
* @cgroup_use - a copy of the lxc.cgroup.use
@@ -183,6 +186,7 @@ static bool string_in_list(char **list, const char *entry)
if (!list)
return false;
+
for (i = 0; list[i]; i++)
if (strcmp(list[i], entry) == 0)
return true;
@@ -220,8 +224,6 @@ static void must_append_controller(char **klist, char **nlist, char ***clist, ch
copy = must_copy_string(entry);
else if (string_in_list(klist, entry))
copy = must_copy_string(entry);
- else if (!strcmp(entry, "cgroup2"))
- copy = must_copy_string(entry);
else
copy = must_prefix_named(entry);
@@ -250,10 +252,21 @@ struct hierarchy *get_hierarchy(const char *c)
if (!hierarchies)
return NULL;
+
for (i = 0; hierarchies[i]; i++) {
+ if (!c) {
+ /* This is the empty unified hierarchy. */
+ if (hierarchies[i]->controllers &&
+ !hierarchies[i]->controllers[0])
+ return hierarchies[i];
+
+ return NULL;
+ }
+
if (string_in_list(hierarchies[i]->controllers, c))
return hierarchies[i];
}
+
return NULL;
}
@@ -278,7 +291,7 @@ static void append_line(char **dest, size_t oldlen, char *new, size_t newlen)
}
/* Slurp in a whole file */
-static char *read_file(char *fnam)
+static char *read_file(const char *fnam)
{
FILE *f;
char *line = NULL, *buf = NULL;
@@ -713,12 +726,14 @@ static bool controller_list_is_dup(struct hierarchy **hlist, char **clist)
static bool controller_found(struct hierarchy **hlist, char *entry)
{
int i;
+
if (!hlist)
return false;
for (i = 0; hlist[i]; i++)
if (string_in_list(hlist[i]->controllers, entry))
return true;
+
return false;
}
@@ -757,12 +772,13 @@ static bool all_controllers_found(void)
* options. But we simply assume that the mountpoint must be
* /sys/fs/cgroup/controller-list
*/
-static char **get_controllers(char **klist, char **nlist, char *line, int type)
+static char **get_controllers_on_hybrid_layout(char **klist, char **nlist,
+ char *line, int type)
{
/* the fourth field is /sys/fs/cgroup/comma-delimited-controller-list */
int i;
char *dup, *p2, *tok;
- char *p = line, *saveptr = NULL;
+ char *p = line, *saveptr = NULL, *sep = ",";
char **aret = NULL;
for (i = 0; i < 4; i++) {
@@ -778,6 +794,7 @@ static char **get_controllers(char **klist, char **nlist, char *line, int type)
CGFSNG_DEBUG("Found hierarchy not under /sys/fs/cgroup: \"%s\"\n", p);
return NULL;
}
+
p += 15;
p2 = strchr(p, ' ');
if (!p2) {
@@ -786,30 +803,60 @@ static char **get_controllers(char **klist, char **nlist, char *line, int type)
}
*p2 = '\0';
- /* cgroup v2 does not have separate mountpoints for controllers */
- if (type == CGROUP_V2) {
- must_append_controller(klist, nlist, &aret, "cgroup2");
- return aret;
+ if (type == CGROUP_SUPER_MAGIC) {
+ /* strdup() here for v1 hierarchies. Otherwise strtok_r() will
+ * destroy mountpoints such as "/sys/fs/cgroup/cpu,cpuacct".
+ */
+ dup = strdup(p);
+ if (!dup)
+ return NULL;
+
+ for (tok = strtok_r(dup, sep, &saveptr); tok;
+ tok = strtok_r(NULL, sep, &saveptr))
+ must_append_controller(klist, nlist, &aret, tok);
+
+ free(dup);
}
+ *p2 = ' ';
+ return aret;
+}
- /* strdup() here for v1 hierarchies. Otherwise strtok_r() will destroy
- * mountpoints such as "/sys/fs/cgroup/cpu,cpuacct".
- */
- dup = strdup(p);
- if (!dup)
+static char **cg_unified_make_empty_controller(void)
+{
+ int newentry;
+ char **aret = NULL;
+
+ newentry = append_null_to_list((void ***)&aret);
+ aret[newentry] = NULL;
+ return aret;
+}
+
+static char **cg_unified_get_controllers(const char *file)
+{
+ char *buf, *tok;
+ char *saveptr = NULL, *sep = " \t\n";
+ char **aret = NULL;
+
+ buf = read_file(file);
+ if (!buf)
return NULL;
- for (tok = strtok_r(dup, ",", &saveptr); tok;
- tok = strtok_r(NULL, ",", &saveptr)) {
- must_append_controller(klist, nlist, &aret, tok);
+ for (tok = strtok_r(buf, sep, &saveptr); tok;
+ tok = strtok_r(NULL, sep, &saveptr)) {
+ int newentry;
+ char *copy;
+
+ newentry = append_null_to_list((void ***)&aret);
+ copy = must_copy_string(tok);
+ aret[newentry] = copy;
}
- free(dup);
+ free(buf);
return aret;
}
-/* Add a controller to our list of hierarchies */
-static void add_controller(char **clist, char *mountpoint, char *base_cgroup)
+static struct hierarchy *add_hierarchy(char **clist, char *mountpoint,
+ char *base_cgroup, int type)
{
struct hierarchy *new;
int newentry;
@@ -819,26 +866,24 @@ static void add_controller(char **clist, char *mountpoint, char *base_cgroup)
new->mountpoint = mountpoint;
new->base_cgroup = base_cgroup;
new->fullcgpath = NULL;
-
- /* record if this is the cgroup v2 hierarchy */
- if (clist && !strcmp(*clist, "cgroup2"))
- new->is_cgroup_v2 = true;
- else
- new->is_cgroup_v2 = false;
+ new->version = type;
newentry = append_null_to_list((void ***)&hierarchies);
hierarchies[newentry] = new;
+ return new;
}
/*
* Get a copy of the mountpoint from @line, which is a line from
* /proc/self/mountinfo
*/
-static char *get_mountpoint(char *line)
+static char *get_mountpoint_on_hybrid_layout(char *line)
{
int i;
- char *p = line, *sret;
+ char *p2;
size_t len;
+ char *p = line;
+ char *sret = NULL;
for (i = 0; i < 4; i++) {
p = strchr(p, ' ');
@@ -846,7 +891,15 @@ static char *get_mountpoint(char *line)
return NULL;
p++;
}
- /* we've already stuck a \0 after the mountpoint */
+
+ if (strncmp(p, "/sys/fs/cgroup/", 15))
+ return NULL;
+
+ p2 = strchr(p + 15, ' ');
+ if (!p2)
+ return NULL;
+ *p2 = '\0';
+
len = strlen(p);
sret = must_alloc(len + 1);
memcpy(sret, p, len);
@@ -893,10 +946,11 @@ static bool controller_in_clist(char *cgline, char *c)
tmp[len] = '\0';
for (tok = strtok_r(tmp, ",", &saveptr); tok;
- tok = strtok_r(NULL, ",", &saveptr)) {
+ tok = strtok_r(NULL, ",", &saveptr)) {
if (strcmp(tok, c) == 0)
return true;
}
+
return false;
}
@@ -904,24 +958,23 @@ static bool controller_in_clist(char *cgline, char *c)
* @basecginfo is a copy of /proc/$$/cgroup. Return the current
* cgroup for @controller
*/
-static char *get_current_cgroup(char *basecginfo, char *controller)
+static char *get_current_cgroup(char *basecginfo, char *controller, int type)
{
char *p = basecginfo;
- bool is_cgroup_v2;
- bool is_cgroup_v2_base_cgroup;
- is_cgroup_v2 = !strcmp(controller, "cgroup2");
- while (true) {
- is_cgroup_v2_base_cgroup = false;
+ for (;;) {
+ bool is_cgv2_base_cgroup = false;
+
/* cgroup v2 entry in "/proc/<pid>/cgroup": "0::/some/path" */
- if (is_cgroup_v2 && (*p == '0'))
- is_cgroup_v2_base_cgroup = true;
+ if ((type == CGROUP2_SUPER_MAGIC) && (*p == '0'))
+ is_cgv2_base_cgroup = true;
p = strchr(p, ':');
if (!p)
return NULL;
p++;
- if (is_cgroup_v2_base_cgroup || controller_in_clist(p, controller)) {
+
+ if (is_cgv2_base_cgroup || (controller && controller_in_clist(p, controller))) {
p = strchr(p, ':');
if (!p)
return NULL;
@@ -945,14 +998,16 @@ static void must_append_string(char ***list, char *entry)
(*list)[newentry] = copy;
}
-static void get_existing_subsystems(char ***klist, char ***nlist)
+static int get_existing_subsystems(char ***klist, char ***nlist)
{
FILE *f;
char *line = NULL;
size_t len = 0;
- if ((f = fopen("/proc/self/cgroup", "r")) == NULL)
- return;
+ f = fopen("/proc/self/cgroup", "r");
+ if (!f)
+ return -1;
+
while (getline(&line, &len, f) != -1) {
char *p, *p2, *tok, *saveptr = NULL;
p = strchr(line, ':');
@@ -977,7 +1032,7 @@ static void get_existing_subsystems(char ***klist, char ***nlist)
}
for (tok = strtok_r(p, ",", &saveptr); tok;
- tok = strtok_r(NULL, ",", &saveptr)) {
+ tok = strtok_r(NULL, ",", &saveptr)) {
if (strncmp(tok, "name=", 5) == 0)
must_append_string(nlist, tok);
else
@@ -987,6 +1042,7 @@ static void get_existing_subsystems(char ***klist, char ***nlist)
free(line);
fclose(f);
+ return 0;
}
static void trim(char *s)
@@ -1054,82 +1110,125 @@ static void lxc_cgfsng_print_debuginfo(const struct cgfsng_handler_data *d)
* At startup, parse_hierarchies finds all the info we need about
* cgroup mountpoints and current cgroups, and stores it in @d.
*/
-static bool parse_hierarchies(void)
+static bool cg_init_hybrid(void)
{
+ int ret;
+ char *basecginfo;
+ bool will_escape;
FILE *f;
- char * line = NULL, *basecginfo;
- char **klist = NULL, **nlist = NULL;
size_t len = 0;
+ char *line = NULL;
+ char **klist = NULL, **nlist = NULL;
/*
* Root spawned containers escape the current cgroup, so use init's
* cgroups as our base in that case.
*/
- if (geteuid())
- basecginfo = read_file("/proc/self/cgroup");
- else
+ will_escape = (geteuid() == 0);
+ if (will_escape)
basecginfo = read_file("/proc/1/cgroup");
+ else
+ basecginfo = read_file("/proc/self/cgroup");
if (!basecginfo)
return false;
- if ((f = fopen("/proc/self/mountinfo", "r")) == NULL) {
- CGFSNG_DEBUG("Failed to open \"/proc/self/mountinfo\"\n");
+ ret = get_existing_subsystems(&klist, &nlist);
+ if (ret < 0) {
+ CGFSNG_DEBUG("Failed to retrieve available cgroup v1 controllers\n");
+ free(basecginfo);
return false;
}
- get_existing_subsystems(&klist, &nlist);
+ f = fopen("/proc/self/mountinfo", "r");
+ if (!f) {
+ CGFSNG_DEBUG("Failed to open \"/proc/self/mountinfo\"\n");
+ return false;
+ }
if (lxc_cgfsng_debug)
lxc_cgfsng_print_basecg_debuginfo(basecginfo, klist, nlist);
- /* we support simple cgroup mounts and lxcfs mounts */
while (getline(&line, &len, f) != -1) {
- char **controller_list = NULL;
- char *mountpoint, *base_cgroup;
- bool writeable;
int type;
+ bool writeable;
+ struct hierarchy *new;
+ char *mountpoint = NULL, *base_cgroup = NULL;
+ char **controller_list = NULL;
type = get_cgroup_version(line);
- if (type < 0)
+ if (type == 0)
continue;
- controller_list = get_controllers(klist, nlist, line, type);
- if (!controller_list)
+ if (type == CGROUP2_SUPER_MAGIC && unified)
continue;
- if (controller_list_is_dup(hierarchies, controller_list)) {
- free(controller_list);
- continue;
+ if (cgroup_layout == CGROUP_LAYOUT_UNKNOWN) {
+ if (type == CGROUP2_SUPER_MAGIC)
+ cgroup_layout = CGROUP_LAYOUT_UNIFIED;
+ else if (type == CGROUP_SUPER_MAGIC)
+ cgroup_layout = CGROUP_LAYOUT_LEGACY;
+ } else if (cgroup_layout == CGROUP_LAYOUT_UNIFIED) {
+ if (type == CGROUP_SUPER_MAGIC)
+ cgroup_layout = CGROUP_LAYOUT_HYBRID;
+ } else if (cgroup_layout == CGROUP_LAYOUT_LEGACY) {
+ if (type == CGROUP2_SUPER_MAGIC)
+ cgroup_layout = CGROUP_LAYOUT_HYBRID;
}
- mountpoint = get_mountpoint(line);
+ controller_list = get_controllers_on_hybrid_layout(klist, nlist, line, type);
+ if (!controller_list && type == CGROUP_SUPER_MAGIC)
+ continue;
+
+ if (type == CGROUP_SUPER_MAGIC)
+ if (controller_list_is_dup(hierarchies, controller_list))
+ goto next;
+
+ mountpoint = get_mountpoint_on_hybrid_layout(line);
if (!mountpoint) {
CGFSNG_DEBUG("Failed parsing mountpoint from \"%s\"\n", line);
- free_string_list(controller_list);
- continue;
+ goto next;
}
- base_cgroup = get_current_cgroup(basecginfo, controller_list[0]);
+ if (type == CGROUP_SUPER_MAGIC)
+ base_cgroup = get_current_cgroup(basecginfo, controller_list[0], CGROUP_SUPER_MAGIC);
+ else
+ base_cgroup = get_current_cgroup(basecginfo, NULL, CGROUP2_SUPER_MAGIC);
if (!base_cgroup) {
- CGFSNG_DEBUG("Failed to find current cgroup for controller \"%s\"\n", controller_list[0]);
- free_string_list(controller_list);
- free(mountpoint);
- continue;
+ CGFSNG_DEBUG("Failed to find current cgroup\n");
+ goto next;
}
trim(base_cgroup);
prune_init_scope(base_cgroup);
- if (type == CGROUP_V2)
+ if (type == CGROUP2_SUPER_MAGIC)
writeable = test_writeable_v2(mountpoint, base_cgroup);
else
writeable = test_writeable_v1(mountpoint, base_cgroup);
- if (!writeable) {
- free_string_list(controller_list);
- free(mountpoint);
- free(base_cgroup);
- continue;
+ if (!writeable)
+ goto next;
+
+ if (type == CGROUP2_SUPER_MAGIC) {
+ char *cgv2_ctrl_path;
+
+ cgv2_ctrl_path = must_make_path(mountpoint, base_cgroup,
+ "cgroup.controllers",
+ NULL);
+
+ controller_list = cg_unified_get_controllers(cgv2_ctrl_path);
+ free(cgv2_ctrl_path);
+ if (!controller_list)
+ controller_list = cg_unified_make_empty_controller();
}
- add_controller(controller_list, mountpoint, base_cgroup);
+ new = add_hierarchy(controller_list, mountpoint, base_cgroup, type);
+ if (type == CGROUP2_SUPER_MAGIC && !unified)
+ unified = new;
+
+ continue;
+
+ next:
+ free_string_list(controller_list);
+ free(mountpoint);
+ free(base_cgroup);
}
free_string_list(klist);
@@ -1154,9 +1253,106 @@ static bool parse_hierarchies(void)
return true;
}
-static bool collect_hierarchy_info(void)
+static int cg_is_pure_unified(void) {
+
+ int ret;
+ struct statfs fs;
+
+ ret = statfs("/sys/fs/cgroup", &fs);
+ if (ret < 0)
+ return -ENOMEDIUM;
+
+ if (is_fs_type(&fs, CGROUP2_SUPER_MAGIC))
+ return CGROUP2_SUPER_MAGIC;
+
+ return 0;
+}
+
+/* Get current cgroup from /proc/self/cgroup for the cgroupfs v2 hierarchy. */
+static char *cg_get_current_cgroup_unified(void)
{
+ char *basecginfo;
+ char *base_cgroup;
+ bool will_escape;
+ char *copy = NULL;
+
+ will_escape = (geteuid() == 0);
+ if (will_escape)
+ basecginfo = read_file("/proc/1/cgroup");
+ else
+ basecginfo = read_file("/proc/self/cgroup");
+ if (!basecginfo)
+ return NULL;
+
+ base_cgroup = strstr(basecginfo, "0::/");
+ if (!base_cgroup)
+ goto cleanup_on_err;
+
+ base_cgroup = base_cgroup + 3;
+ copy = copy_to_eol(base_cgroup);
+ if (!copy)
+ goto cleanup_on_err;
+
+cleanup_on_err:
+ free(basecginfo);
+ if (copy)
+ trim(copy);
+
+ return copy;
+}
+
+static int cg_init_unified(void)
+{
+ int ret;
+ char *mountpoint, *subtree_path;
+ char **delegatable;
+ char *base_cgroup = NULL;
+
+ ret = cg_is_pure_unified();
+ if (ret == -ENOMEDIUM)
+ return -ENOMEDIUM;
+
+ if (ret != CGROUP2_SUPER_MAGIC)
+ return 0;
+
+ base_cgroup = cg_get_current_cgroup_unified();
+ if (!base_cgroup)
+ return -EINVAL;
+ prune_init_scope(base_cgroup);
+
+ /* We assume that we have already been given controllers to delegate
+ * further down the hierarchy. If not it is up to the user to delegate
+ * them to us.
+ */
+ mountpoint = must_copy_string("/sys/fs/cgroup");
+ subtree_path = must_make_path(mountpoint, base_cgroup,
+ "cgroup.subtree_control", NULL);
+ delegatable = cg_unified_get_controllers(subtree_path);
+ free(subtree_path);
+ if (!delegatable)
+ delegatable = cg_unified_make_empty_controller();
+ if (!delegatable[0])
+ CGFSNG_DEBUG("No controllers are enabled for delegation\n");
+
+ /* TODO: If the user requested specific controllers via lxc.cgroup.use
+ * we should verify here. The reason I'm not doing it right is that I'm
+ * not convinced that lxc.cgroup.use will be the future since it is a
+ * global property. I much rather have an option that lets you request
+ * controllers per container.
+ */
+
+ add_hierarchy(delegatable, mountpoint, base_cgroup, CGROUP2_SUPER_MAGIC);
+ unified = hierarchies[0];
+
+ cgroup_layout = CGROUP_LAYOUT_UNIFIED;
+ return CGROUP2_SUPER_MAGIC;
+}
+
+static bool cg_init(void)
+{
+ int ret;
const char *tmp;
+
errno = 0;
tmp = lxc_global_config_value("lxc.cgroup.use");
if (!cgroup_use && errno != 0) { /* lxc.cgroup.use can be NULL */
@@ -1165,7 +1361,14 @@ static bool collect_hierarchy_info(void)
}
cgroup_use = must_copy_string(tmp);
- return parse_hierarchies();
+ ret = cg_init_unified();
+ if (ret < 0)
+ return false;
+
+ if (ret == CGROUP2_SUPER_MAGIC)
+ return true;
+
+ return cg_init_hybrid();
}
static void *cgfsng_init(struct lxc_handler *handler)
@@ -1196,6 +1399,16 @@ static void *cgfsng_init(struct lxc_handler *handler)
}
d->cgroup_pattern = must_copy_string(cgroup_pattern);
+ d->cgroup_layout = cgroup_layout;
+ if (d->cgroup_layout == CGROUP_LAYOUT_LEGACY)
+ TRACE("Running with legacy cgroup layout");
+ else if (d->cgroup_layout == CGROUP_LAYOUT_HYBRID)
+ TRACE("Running with hybrid cgroup layout");
+ else if (d->cgroup_layout == CGROUP_LAYOUT_UNIFIED)
+ TRACE("Running with unified cgroup layout");
+ else
+ WARN("Running with unknown cgroup layout");
+
if (lxc_cgfsng_debug)
lxc_cgfsng_print_debuginfo(d);
@@ -1343,7 +1556,7 @@ struct cgroup_ops *cgfsng_ops_init(void)
if (getenv("LXC_DEBUG_CGFSNG"))
lxc_cgfsng_debug = true;
- if (!collect_hierarchy_info())
+ if (!cg_init())
return NULL;
return &cgfsng_ops;
@@ -1529,7 +1742,7 @@ static int chown_cgroup_wrapper(void *data)
WARN("Error chmoding %s: %s", path, strerror(errno));
free(fullpath);
- if (!hierarchies[i]->is_cgroup_v2)
+ if (hierarchies[i]->version != CGROUP2_SUPER_MAGIC)
continue;
fullpath = must_make_path(path, "cgroup.subtree_control", NULL);
@@ -1679,7 +1892,7 @@ static int mount_cgroup_cgns_supported(int type, struct hierarchy *h, const char
if (type == LXC_AUTO_CGROUP_RO || type == LXC_AUTO_CGROUP_FULL_RO)
flags |= MS_RDONLY;
- if (!h->is_cgroup_v2) {
+ if (h->version != CGROUP2_SUPER_MAGIC) {
controllers = lxc_string_join(",", (const char **)h->controllers, false);
if (!controllers)
return -ENOMEM;
@@ -1902,25 +2115,33 @@ static bool cgfsng_get_hierarchies(int n, char ***out)
#define THAWED "THAWED"
#define THAWED_LEN (strlen(THAWED))
+/* TODO: If the unified cgroup hierarchy grows a freezer controller this needs
+ * to be adapted.
+ */
static bool cgfsng_unfreeze(void *hdata)
{
+ int ret;
char *fullpath;
- struct hierarchy *h = get_hierarchy("freezer");
+ struct hierarchy *h;
+ h = get_hierarchy("freezer");
if (!h)
return false;
+
fullpath = must_make_path(h->fullcgpath, "freezer.state", NULL);
- if (lxc_write_to_file(fullpath, THAWED, THAWED_LEN, false) != 0) {
- free(fullpath);
- return false;
- }
+ ret = lxc_write_to_file(fullpath, THAWED, THAWED_LEN, false);
free(fullpath);
+ if (ret < 0)
+ return false;
+
return true;
}
static const char *cgfsng_get_cgroup(void *hdata, const char *subsystem)
{
- struct hierarchy *h = get_hierarchy(subsystem);
+ struct hierarchy *h;
+
+ h = get_hierarchy(subsystem);
if (!h)
return NULL;
diff --git a/src/lxc/cgroups/cgroup.h b/src/lxc/cgroups/cgroup.h
index f17a6abe0..f409eee7c 100644
--- a/src/lxc/cgroups/cgroup.h
+++ b/src/lxc/cgroups/cgroup.h
@@ -32,6 +32,13 @@ struct lxc_handler;
struct lxc_conf;
struct lxc_list;
+typedef enum {
+ CGROUP_LAYOUT_UNKNOWN = -1,
+ CGROUP_LAYOUT_LEGACY = 0,
+ CGROUP_LAYOUT_HYBRID = 1,
+ CGROUP_LAYOUT_UNIFIED = 2,
+} cgroup_layout_t;
+
typedef enum {
CGFS,
CGMANAGER,
diff --git a/src/lxc/cgroups/cgroup_utils.c b/src/lxc/cgroups/cgroup_utils.c
index 6dda1a617..8e2a40eda 100644
--- a/src/lxc/cgroups/cgroup_utils.c
+++ b/src/lxc/cgroups/cgroup_utils.c
@@ -35,12 +35,12 @@
int get_cgroup_version(char *line)
{
if (is_cgroupfs_v1(line))
- return CGROUP_V1;
+ return CGROUP_SUPER_MAGIC;
if (is_cgroupfs_v2(line))
- return CGROUP_V2;
+ return CGROUP2_SUPER_MAGIC;
- return -1;
+ return 0;
}
bool is_cgroupfs_v1(char *line)
diff --git a/src/lxc/cgroups/cgroup_utils.h b/src/lxc/cgroups/cgroup_utils.h
index e9e4448a6..3a4726e5b 100644
--- a/src/lxc/cgroups/cgroup_utils.h
+++ b/src/lxc/cgroups/cgroup_utils.h
@@ -28,10 +28,6 @@
#include <stdbool.h>
#include <stdio.h>
-#define CGROUP_V1 0
-#define CGROUP_V2 1
-#define LXCFS_CGROUP 2
-
/* Retrieve the cgroup version of a given entry from /proc/<pid>/mountinfo. */
extern int get_cgroup_version(char *line);
From 0c3deb94f438ccf0d622440d80e27a41db465d8b Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Wed, 31 Jan 2018 16:33:17 +0100
Subject: [PATCH 2/7] cgroups: cgfsng_create: handle unified hierarchy
Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
src/lxc/cgroups/cgfsng.c | 96 ++++++++++++++++++++++++++++++++++++++++++------
src/lxc/utils.c | 27 ++++++++++++++
src/lxc/utils.h | 11 +++++-
3 files changed, 122 insertions(+), 12 deletions(-)
diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c
index 4724fb5bc..24a020ff3 100644
--- a/src/lxc/cgroups/cgfsng.c
+++ b/src/lxc/cgroups/cgfsng.c
@@ -1562,18 +1562,92 @@ struct cgroup_ops *cgfsng_ops_init(void)
return &cgfsng_ops;
}
+static bool handle_unified_hierarchy(struct hierarchy *h, char *cgname)
+{
+ char **it;
+ size_t i, parts_len;
+ size_t full_len = 0;
+ char *add_controllers = NULL, *cgroup = NULL;
+ char **parts = NULL;
+ bool bret = false;
+
+ if (h->version != CGROUP2_SUPER_MAGIC)
+ return true;
+
+ if (!h->controllers)
+ return true;
+
+ /* For now we simply enable all controllers that we have detected by
+ * creating a string like "+memory +pids +cpu +io".
+ * TODO: In the near future we might want to support "-<controller>"
+ * etc. but whether supporting semantics like this make sense will need
+ * some thinking.
+ */
+ for (it = h->controllers; it && *it; it++) {
+ full_len += strlen(*it) + 2;
+ add_controllers = must_realloc(add_controllers, full_len + 1);
+ if (h->controllers[0] == *it)
+ add_controllers[0] = '\0';
+ strcat(add_controllers, "+");
+ strcat(add_controllers, *it);
+ if ((it + 1) && *(it + 1))
+ strcat(add_controllers, " ");
+ }
+
+ parts = lxc_string_split(cgname, '/');
+ if (!parts)
+ goto on_error;
+ parts_len = lxc_array_len((void **)parts);
+ if (parts_len > 0)
+ parts_len--;
+
+ cgroup = must_make_path(h->mountpoint, h->base_cgroup, NULL);
+ for (i = 0; i < parts_len; i++) {
+ int ret;
+ char *target;
+
+ cgroup = must_append_path(cgroup, parts[i], NULL);
+ target = must_make_path(cgroup, "cgroup.subtree_control", NULL);
+ ret = lxc_write_to_file(target, add_controllers, full_len, false);
+ free(target);
+ if (ret < 0) {
+ SYSERROR("Could not enable \"%s\" controllers in the "
+ "unified cgroup \"%s\"", add_controllers, cgroup);
+ goto on_error;
+ }
+ }
+
+ bret = true;
+
+on_error:
+ lxc_free_array((void **)parts, free);
+ free(add_controllers);
+ free(cgroup);
+ return bret;
+}
+
static bool create_path_for_hierarchy(struct hierarchy *h, char *cgname)
{
+ int ret;
+
h->fullcgpath = must_make_path(h->mountpoint, h->base_cgroup, cgname, NULL);
if (dir_exists(h->fullcgpath)) { /* it must not already exist */
- ERROR("Path \"%s\" already existed.", h->fullcgpath);
+ ERROR("cgroup \"%s\" already existed", h->fullcgpath);
return false;
}
+
if (!handle_cpuset_hierarchy(h, cgname)) {
- ERROR("Failed to handle cgroupfs v1 cpuset controller.");
+ ERROR("Failed to handle cgroupfs v1 cpuset controller");
return false;
}
- return mkdir_p(h->fullcgpath, 0755) == 0;
+
+ ret = mkdir_p(h->fullcgpath, 0755);
+ if (ret < 0) {
+ ERROR("Failed to create cgroup \"%s\"", h->fullcgpath);
+ return false;
+ }
+
+ return handle_unified_hierarchy(h, cgname);
}
static void remove_path_for_hierarchy(struct hierarchy *h, char *cgname)
@@ -1592,7 +1666,7 @@ static inline bool cgfsng_create(void *hdata)
{
int i;
size_t len;
- char *cgname, *offset, *tmp;
+ char *container_cgroup, *offset, *tmp;
int idx = 0;
struct cgfsng_handler_data *d = hdata;
@@ -1613,10 +1687,10 @@ static inline bool cgfsng_create(void *hdata)
return false;
}
len = strlen(tmp) + 5; /* leave room for -NNN\0 */
- cgname = must_alloc(len);
- strcpy(cgname, tmp);
+ container_cgroup = must_alloc(len);
+ strcpy(container_cgroup, tmp);
free(tmp);
- offset = cgname + len - 5;
+ offset = container_cgroup + len - 5;
again:
if (idx == 1000) {
@@ -1638,23 +1712,23 @@ static inline bool cgfsng_create(void *hdata)
}
}
for (i = 0; hierarchies[i]; i++) {
- if (!create_path_for_hierarchy(hierarchies[i], cgname)) {
+ if (!create_path_for_hierarchy(hierarchies[i], container_cgroup)) {
int j;
ERROR("Failed to create \"%s\"", hierarchies[i]->fullcgpath);
free(hierarchies[i]->fullcgpath);
hierarchies[i]->fullcgpath = NULL;
for (j = 0; j < i; j++)
- remove_path_for_hierarchy(hierarchies[j], cgname);
+ remove_path_for_hierarchy(hierarchies[j], container_cgroup);
idx++;
goto again;
}
}
/* Done */
- d->container_cgroup = cgname;
+ d->container_cgroup = container_cgroup;
return true;
out_free:
- free(cgname);
+ free(container_cgroup);
return false;
}
diff --git a/src/lxc/utils.c b/src/lxc/utils.c
index c7812fdac..a1fe7d4ec 100644
--- a/src/lxc/utils.c
+++ b/src/lxc/utils.c
@@ -2307,6 +2307,33 @@ char *must_make_path(const char *first, ...)
return dest;
}
+char *must_append_path(char *first, ...)
+{
+ char *cur;
+ size_t full_len;
+ va_list args;
+ char *dest = first;
+
+ full_len = strlen(first);
+ va_start(args, first);
+ while ((cur = va_arg(args, char *)) != NULL) {
+ full_len += strlen(cur);
+
+ if (cur[0] != '/')
+ full_len++;
+
+ dest = must_realloc(dest, full_len + 1);
+
+ if (cur[0] != '/')
+ strcat(dest, "/");
+
+ strcat(dest, cur);
+ }
+ va_end(args);
+
+ return dest;
+}
+
char *must_copy_string(const char *entry)
{
char *ret;
diff --git a/src/lxc/utils.h b/src/lxc/utils.h
index 223580edc..4d129d137 100644
--- a/src/lxc/utils.h
+++ b/src/lxc/utils.h
@@ -86,6 +86,14 @@
#define CAP_SYS_ADMIN 21
#endif
+#ifndef CGROUP_SUPER_MAGIC
+#define CGROUP_SUPER_MAGIC 0x27e0eb
+#endif
+
+#ifndef CGROUP2_SUPER_MAGIC
+#define CGROUP2_SUPER_MAGIC 0x63677270
+#endif
+
/* Useful macros */
/* Maximum number for 64 bit integer is a string with 21 digits: 2^64 - 1 = 21 */
#define LXC_NUMSTRLEN64 21
@@ -529,7 +537,8 @@ extern int run_command(char *buf, size_t buf_size, int (*child_fn)(void *),
/* Concatenate all passed-in strings into one path. Do not fail. If any piece
* is not prefixed with '/', add a '/'.
*/
-extern char *must_make_path(const char *first, ...) __attribute__((sentinel));
+__attribute__((sentinel)) extern char *must_make_path(const char *first, ...);
+__attribute__((sentinel)) extern char *must_append_path(char *first, ...);
/* return copy of string @entry; do not fail. */
extern char *must_copy_string(const char *entry);
From c2aed66d0ff8440cae33b5c08ca055a234197c88 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Wed, 31 Jan 2018 16:38:55 +0100
Subject: [PATCH 3/7] cgroups: cgfsng_attach: handle unified hierarchy
Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
src/lxc/cgroups/cgfsng.c | 94 +++++++++++++++++++++++++++++++++++++++++++++---
src/lxc/commands.c | 15 +++++---
2 files changed, 99 insertions(+), 10 deletions(-)
diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c
index 24a020ff3..e0ae55a01 100644
--- a/src/lxc/cgroups/cgfsng.c
+++ b/src/lxc/cgroups/cgfsng.c
@@ -2233,26 +2233,110 @@ static char *build_full_cgpath_from_monitorpath(struct hierarchy *h,
return must_make_path(h->mountpoint, inpath, filename, NULL);
}
+/* Technically, we're always at a delegation boundary here. (This is especially
+ * true when cgroup namespaces are available.) The reasoning is that in order
+ * for us to have been able to start a container in the first place the root
+ * cgroup must have been a leaf node. Now, either the container's init system
+ * has populated the cgroup and kept it as a leaf node or it has created
+ * subtrees. In the former case we will simply attach to the leaf node we
+ * created when we started the container in the latter case we create our own
+ * cgroup for the attaching process.
+ */
+static int cg_attach_unified(const struct hierarchy *h, const char *name,
+ const char *lxcpath, const char *pidstr,
+ size_t pidstr_len, const char *controller)
+{
+ int ret;
+ size_t len;
+ int fret = -1, idx = 0;
+ char *base_path = NULL, *container_cgroup = NULL, *full_path = NULL;
+
+ container_cgroup = lxc_cmd_get_cgroup_path(name, lxcpath, controller);
+ /* not running */
+ if (!container_cgroup)
+ return 0;
+
+ base_path = must_make_path(h->mountpoint, container_cgroup, NULL);
+ full_path = must_make_path(base_path, "cgroup.procs", NULL);
+ /* cgroup is populated */
+ ret = lxc_write_to_file(full_path, pidstr, pidstr_len, false);
+ if (ret < 0 && errno != EBUSY)
+ goto on_error;
+
+ if (ret == 0)
+ goto on_success;
+
+ free(full_path);
+
+ len = strlen(base_path) + sizeof("/lxc-1000") - 1 +
+ sizeof("/cgroup-procs") - 1;
+ full_path = must_alloc(len + 1);
+ do {
+ if (idx)
+ ret = snprintf(full_path, len + 1, "%s/lxc-%d",
+ base_path, idx);
+ else
+ ret = snprintf(full_path, len + 1, "%s/lxc", base_path);
+ if (ret < 0 || (size_t)ret >= len + 1)
+ goto on_error;
+
+ ret = mkdir_p(full_path, 0755);
+ if (ret < 0 && errno != EEXIST)
+ goto on_error;
+
+ strcat(full_path, "/cgroup.procs");
+ ret = lxc_write_to_file(full_path, pidstr, len, false);
+ if (ret == 0)
+ goto on_success;
+
+ /* this is a non-leaf node */
+ if (errno != EBUSY)
+ goto on_error;
+
+ } while (++idx > 0 && idx < 1000);
+
+on_success:
+ if (idx < 1000)
+ fret = 0;
+
+on_error:
+ free(base_path);
+ free(container_cgroup);
+ free(full_path);
+
+ return fret;
+}
+
static bool cgfsng_attach(const char *name, const char *lxcpath, pid_t pid)
{
+ int i, len, ret;
char pidstr[25];
- int i, len;
len = snprintf(pidstr, 25, "%d", pid);
if (len < 0 || len > 25)
return false;
for (i = 0; hierarchies[i]; i++) {
- char *path, *fullpath;
+ char *path;
+ char *fullpath = NULL;
struct hierarchy *h = hierarchies[i];
+ if (h->version == CGROUP2_SUPER_MAGIC) {
+ ret = cg_attach_unified(h, name, lxcpath, pidstr, len, h->controllers[0]);
+ if (ret < 0)
+ return false;
+
+ continue;
+ }
+
path = lxc_cmd_get_cgroup_path(name, lxcpath, h->controllers[0]);
- if (!path) /* not running */
+ /* not running */
+ if (!path)
continue;
fullpath = build_full_cgpath_from_monitorpath(h, path, "cgroup.procs");
- free(path);
- if (lxc_write_to_file(fullpath, pidstr, len, false) != 0) {
+ ret = lxc_write_to_file(fullpath, pidstr, len, false);
+ if (ret < 0) {
SYSERROR("Failed to attach %d to %s", (int)pid, fullpath);
free(fullpath);
return false;
diff --git a/src/lxc/commands.c b/src/lxc/commands.c
index b4d0e3979..eae06d9be 100644
--- a/src/lxc/commands.c
+++ b/src/lxc/commands.c
@@ -445,11 +445,16 @@ char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
struct lxc_cmd_rr cmd = {
.req = {
.cmd = LXC_CMD_GET_CGROUP,
- .datalen = strlen(subsystem) + 1,
.data = subsystem,
+ .datalen = 0,
},
};
+ cmd.req.data = subsystem;
+ cmd.req.datalen = 0;
+ if (subsystem)
+ cmd.req.datalen = strlen(subsystem) + 1;
+
ret = lxc_cmd(name, &cmd, &stopped, lxcpath, NULL);
if (ret < 0)
return NULL;
@@ -469,10 +474,10 @@ static int lxc_cmd_get_cgroup_callback(int fd, struct lxc_cmd_req *req,
const char *path;
struct lxc_cmd_rsp rsp;
- if (req->datalen < 1)
- return -1;
-
- path = cgroup_get_cgroup(handler, req->data);
+ if (req->datalen > 0)
+ path = cgroup_get_cgroup(handler, req->data);
+ else
+ path = cgroup_get_cgroup(handler, NULL);
if (!path)
return -1;
From 0069cc619ed2c9fc2405c1a500f6c9c211ee4bba Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Wed, 31 Jan 2018 16:41:53 +0100
Subject: [PATCH 4/7] cgroups: cgfsng_get: handle unified hierarchy
Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
src/lxc/cgroups/cgfsng.c | 28 +++++++++++++++++-----------
1 file changed, 17 insertions(+), 11 deletions(-)
diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c
index e0ae55a01..961a25fee 100644
--- a/src/lxc/cgroups/cgfsng.c
+++ b/src/lxc/cgroups/cgfsng.c
@@ -2352,28 +2352,34 @@ static bool cgfsng_attach(const char *name, const char *lxcpath, pid_t pid)
* Here we don't have a cgroup_data set up, so we ask the running
* container through the commands API for the cgroup path
*/
-static int cgfsng_get(const char *filename, char *value, size_t len, const char *name, const char *lxcpath)
+static int cgfsng_get(const char *filename, char *value, size_t len,
+ const char *name, const char *lxcpath)
{
- char *subsystem, *p, *path;
- struct hierarchy *h;
int ret = -1;
+ size_t controller_len;
+ char *controller, *p, *path;
+ struct hierarchy *h;
- subsystem = alloca(strlen(filename) + 1);
- strcpy(subsystem, filename);
- if ((p = strchr(subsystem, '.')) != NULL)
+ controller_len = strlen(filename);
+ controller = alloca(controller_len + 1);
+ strcpy(controller, filename);
+ p = strchr(controller, '.');
+ if (p)
*p = '\0';
- path = lxc_cmd_get_cgroup_path(name, lxcpath, subsystem);
- if (!path) /* not running */
+ path = lxc_cmd_get_cgroup_path(name, lxcpath, controller);
+ /* not running */
+ if (!path)
return -1;
- h = get_hierarchy(subsystem);
+ h = get_hierarchy(controller);
if (h) {
- char *fullpath = build_full_cgpath_from_monitorpath(h, path, filename);
+ char *fullpath;
+
+ fullpath = build_full_cgpath_from_monitorpath(h, path, filename);
ret = lxc_read_from_file(fullpath, value, len);
free(fullpath);
}
-
free(path);
return ret;
From 8777796839ab754e22b088dbf4e681d903c68ce8 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Wed, 31 Jan 2018 16:42:19 +0100
Subject: [PATCH 5/7] cgroups: cgfsng_set: handle unified hierarchy
Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
src/lxc/cgroups/cgfsng.c | 28 +++++++++++++++++-----------
1 file changed, 17 insertions(+), 11 deletions(-)
diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c
index 961a25fee..c94257c9f 100644
--- a/src/lxc/cgroups/cgfsng.c
+++ b/src/lxc/cgroups/cgfsng.c
@@ -2390,28 +2390,34 @@ static int cgfsng_get(const char *filename, char *value, size_t len,
* Here we don't have a cgroup_data set up, so we ask the running
* container through the commands API for the cgroup path
*/
-static int cgfsng_set(const char *filename, const char *value, const char *name, const char *lxcpath)
+static int cgfsng_set(const char *filename, const char *value, const char *name,
+ const char *lxcpath)
{
- char *subsystem, *p, *path;
- struct hierarchy *h;
int ret = -1;
+ size_t controller_len;
+ char *controller, *p, *path;
+ struct hierarchy *h;
- subsystem = alloca(strlen(filename) + 1);
- strcpy(subsystem, filename);
- if ((p = strchr(subsystem, '.')) != NULL)
+ controller_len = strlen(filename);
+ controller = alloca(controller_len + 1);
+ strcpy(controller, filename);
+ p = strchr(controller, '.');
+ if (p)
*p = '\0';
- path = lxc_cmd_get_cgroup_path(name, lxcpath, subsystem);
- if (!path) /* not running */
+ path = lxc_cmd_get_cgroup_path(name, lxcpath, controller);
+ /* not running */
+ if (!path)
return -1;
- h = get_hierarchy(subsystem);
+ h = get_hierarchy(controller);
if (h) {
- char *fullpath = build_full_cgpath_from_monitorpath(h, path, filename);
+ char *fullpath;
+
+ fullpath = build_full_cgpath_from_monitorpath(h, path, filename);
ret = lxc_write_to_file(fullpath, value, strlen(value), false);
free(fullpath);
}
-
free(path);
return ret;
From 54860ed02790c1c87bf1125c3b91b52ee1ee1c37 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Wed, 31 Jan 2018 16:44:30 +0100
Subject: [PATCH 6/7] confile: add lxc.cgroup2.[controller].[property]
Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
doc/lxc.container.conf.sgml.in | 30 +++++++---
src/lxc/conf.c | 33 ++++++++---
src/lxc/conf.h | 10 +++-
src/lxc/confile.c | 123 +++++++++++++++++++++++++++++++----------
4 files changed, 150 insertions(+), 46 deletions(-)
diff --git a/doc/lxc.container.conf.sgml.in b/doc/lxc.container.conf.sgml.in
index 3ae4bfd18..dfb7ba05d 100644
--- a/doc/lxc.container.conf.sgml.in
+++ b/doc/lxc.container.conf.sgml.in
@@ -1330,17 +1330,31 @@ dev/null proc/kcore none bind,relative 0 0
<variablelist>
<varlistentry>
<term>
- <option>lxc.cgroup.[subsystem name]</option>
+ <option>lxc.cgroup.[controller name]</option>
</term>
<listitem>
<para>
- specify the control group value to be set. The
- subsystem name is the literal name of the control group
- subsystem. The permitted names and the syntax of their
- values is not dictated by LXC, instead it depends on the
- features of the Linux kernel running at the time the
- container is started,
- eg. <option>lxc.cgroup.cpuset.cpus</option>
+ Specify the control group value to be set on a legacy cgroup
+ hierarchy. The controller name is the literal name of the control
+ group. The permitted names and the syntax of their values is not
+ dictated by LXC, instead it depends on the features of the Linux
+ kernel running at the time the container is started, eg.
+ <option>lxc.cgroup.cpuset.cpus</option>
+ </para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>
+ <option>lxc.cgroup2.[controller name]</option>
+ </term>
+ <listitem>
+ <para>
+ Specify the control group value to be set on the unified cgroup
+ shierarchy. The controller name is the literal name of the control
+ group. The permitted names and the syntax of their values is not
+ dictated by LXC, instead it depends on the features of the Linux
+ kernel running at the time the container is started, eg.
+ <option>lxc.cgroup2.memory.high</option>
</para>
</listitem>
</varlistentry>
diff --git a/src/lxc/conf.c b/src/lxc/conf.c
index a080bbd7e..9b6868940 100644
--- a/src/lxc/conf.c
+++ b/src/lxc/conf.c
@@ -2555,6 +2555,7 @@ struct lxc_conf *lxc_conf_init(void)
}
new->logfd = -1;
lxc_list_init(&new->cgroup);
+ lxc_list_init(&new->cgroup2);
lxc_list_init(&new->network);
lxc_list_init(&new->mount_list);
lxc_list_init(&new->caps);
@@ -3446,23 +3447,38 @@ int lxc_clear_config_keepcaps(struct lxc_conf *c)
return 0;
}
-int lxc_clear_cgroups(struct lxc_conf *c, const char *key)
+int lxc_clear_cgroups(struct lxc_conf *c, const char *key, int version)
{
- struct lxc_list *it,*next;
- bool all = false;
+ char *global_token, *namespaced_token;
+ struct lxc_list *it, *next, *list;
const char *k = NULL;
+ bool all = false;
- if (strcmp(key, "lxc.cgroup") == 0)
+ if (version == CGROUP2_SUPER_MAGIC) {
+ global_token = "lxc.cgroup2";
+ namespaced_token = "lxc.cgroup2.";
+ list = &c->cgroup2;
+ } else if (version == CGROUP_SUPER_MAGIC) {
+ global_token = "lxc.cgroup";
+ namespaced_token = "lxc.cgroup.";
+ list = &c->cgroup;
+ } else {
+ return -1;
+ }
+
+ if (strcmp(key, global_token) == 0)
all = true;
- else if (strncmp(key, "lxc.cgroup.", sizeof("lxc.cgroup.") - 1) == 0)
- k = key + sizeof("lxc.cgroup.") - 1;
+ else if (strncmp(key, namespaced_token, sizeof(namespaced_token) - 1) == 0)
+ k = key + sizeof(namespaced_token) - 1;
else
return -1;
- lxc_list_for_each_safe(it, &c->cgroup, next) {
+ lxc_list_for_each_safe(it, list, next) {
struct lxc_cgroup *cg = it->elem;
+
if (!all && strcmp(cg->subsystem, k) != 0)
continue;
+
lxc_list_del(it);
free(cg->subsystem);
free(cg->value);
@@ -3680,7 +3696,8 @@ void lxc_conf_free(struct lxc_conf *conf)
lxc_seccomp_free(conf);
lxc_clear_config_caps(conf);
lxc_clear_config_keepcaps(conf);
- lxc_clear_cgroups(conf, "lxc.cgroup");
+ lxc_clear_cgroups(conf, "lxc.cgroup", CGROUP_SUPER_MAGIC);
+ lxc_clear_cgroups(conf, "lxc.cgroup2", CGROUP2_SUPER_MAGIC);
lxc_clear_hooks(conf, "lxc.hook");
lxc_clear_mount_entries(conf);
lxc_clear_idmaps(conf);
diff --git a/src/lxc/conf.h b/src/lxc/conf.h
index 1146a1d4f..2346b717f 100644
--- a/src/lxc/conf.h
+++ b/src/lxc/conf.h
@@ -52,6 +52,8 @@ typedef void * scmp_filter_ctx;
* programmer to specify the right subsystem.
* @subsystem : the targeted subsystem
* @value : the value to set
+ * @version : The version of the cgroup filesystem on which the controller
+ * resides.
*
* @controllers : The controllers to use for this container.
* @dir : The name of the directory containing the container's cgroup.
@@ -61,6 +63,7 @@ struct lxc_cgroup {
union {
/* information about a specific controller */
struct /* controller */ {
+ int version;
char *subsystem;
char *value;
};
@@ -282,7 +285,10 @@ struct lxc_conf {
int reboot;
signed long personality;
struct utsname *utsname;
- struct lxc_list cgroup;
+ struct {
+ struct lxc_list cgroup;
+ struct lxc_list cgroup2;
+ };
struct {
struct lxc_list id_map;
@@ -433,7 +439,7 @@ extern int lxc_create_tty(const char *name, struct lxc_conf *conf);
extern void lxc_delete_tty(struct lxc_tty_info *tty_info);
extern int lxc_clear_config_caps(struct lxc_conf *c);
extern int lxc_clear_config_keepcaps(struct lxc_conf *c);
-extern int lxc_clear_cgroups(struct lxc_conf *c, const char *key);
+extern int lxc_clear_cgroups(struct lxc_conf *c, const char *key, int version);
extern int lxc_clear_mount_entries(struct lxc_conf *c);
extern int lxc_clear_automounts(struct lxc_conf *c);
extern int lxc_clear_hooks(struct lxc_conf *c, const char *key);
diff --git a/src/lxc/confile.c b/src/lxc/confile.c
index 3deec58bf..fa4f84da9 100644
--- a/src/lxc/confile.c
+++ b/src/lxc/confile.c
@@ -81,6 +81,7 @@ lxc_config_define(apparmor_profile);
lxc_config_define(cap_drop);
lxc_config_define(cap_keep);
lxc_config_define(cgroup_controller);
+lxc_config_define(cgroup2_controller);
lxc_config_define(cgroup_dir);
lxc_config_define(console_logfile);
lxc_config_define(console_rotate);
@@ -153,6 +154,7 @@ static struct lxc_config_t config[] = {
{ "lxc.autodev", false, set_config_autodev, get_config_autodev, clr_config_autodev, },
{ "lxc.cap.drop", false, set_config_cap_drop, get_config_cap_drop, clr_config_cap_drop, },
{ "lxc.cap.keep", false, set_config_cap_keep, get_config_cap_keep, clr_config_cap_keep, },
+ { "lxc.cgroup2", false, set_config_cgroup2_controller, get_config_cgroup2_controller, clr_config_cgroup2_controller, },
{ "lxc.cgroup.dir", false, set_config_cgroup_dir, get_config_cgroup_dir, clr_config_cgroup_dir, },
{ "lxc.cgroup", false, set_config_cgroup_controller, get_config_cgroup_controller, clr_config_cgroup_controller, },
{ "lxc.console.buffer.logfile", false, set_config_console_buffer_logfile, get_config_console_buffer_logfile, clr_config_console_buffer_logfile, },
@@ -1374,28 +1376,33 @@ static int set_config_signal_stop(const char *key, const char *value,
return 0;
}
-static int set_config_cgroup_controller(const char *key, const char *value,
- struct lxc_conf *lxc_conf, void *data)
+static int __set_config_cgroup_controller(const char *key, const char *value,
+ struct lxc_conf *lxc_conf, int version)
{
- char *subkey;
- char *token = "lxc.cgroup.";
+ const char *subkey, *token;
+ size_t token_len;
struct lxc_list *cglist = NULL;
struct lxc_cgroup *cgelem = NULL;
if (lxc_config_value_empty(value))
- return lxc_clear_cgroups(lxc_conf, key);
-
- subkey = strstr(key, token);
- if (!subkey)
- return -1;
-
- if (!strlen(subkey))
- return -1;
+ return lxc_clear_cgroups(lxc_conf, key, version);
+
+ if (version == CGROUP2_SUPER_MAGIC) {
+ token = "lxc.cgroup2.";
+ token_len = 12;
+ } else if (version == CGROUP_SUPER_MAGIC) {
+ token = "lxc.cgroup.";
+ token_len = 11;
+ } else {
+ return -EINVAL;
+ }
- if (strlen(subkey) == strlen(token))
- return -1;
+ if (strncmp(key, token, token_len) != 0)
+ return -EINVAL;
- subkey += strlen(token);
+ subkey = key + token_len;
+ if (*subkey == '\0')
+ return -EINVAL;
cglist = malloc(sizeof(*cglist));
if (!cglist)
@@ -1407,14 +1414,21 @@ static int set_config_cgroup_controller(const char *key, const char *value,
memset(cgelem, 0, sizeof(*cgelem));
cgelem->subsystem = strdup(subkey);
- cgelem->value = strdup(value);
+ if (!cgelem->subsystem)
+ goto out;
- if (!cgelem->subsystem || !cgelem->value)
+ cgelem->value = strdup(value);
+ if (!cgelem->value)
goto out;
- cglist->elem = cgelem;
+ cgelem->version = version;
+
+ lxc_list_add_elem(cglist, cgelem);
- lxc_list_add_tail(&lxc_conf->cgroup, cglist);
+ if (version == CGROUP2_SUPER_MAGIC)
+ lxc_list_add_tail(&lxc_conf->cgroup2, cglist);
+ else
+ lxc_list_add_tail(&lxc_conf->cgroup, cglist);
return 0;
@@ -1429,6 +1443,21 @@ static int set_config_cgroup_controller(const char *key, const char *value,
return -1;
}
+static int set_config_cgroup_controller(const char *key, const char *value,
+ struct lxc_conf *lxc_conf, void *data)
+{
+ return __set_config_cgroup_controller(key, value, lxc_conf,
+ CGROUP_SUPER_MAGIC);
+}
+
+static int set_config_cgroup2_controller(const char *key, const char *value,
+ struct lxc_conf *lxc_conf, void *data)
+{
+ return __set_config_cgroup_controller(key, value, lxc_conf,
+ CGROUP2_SUPER_MAGIC);
+}
+
+
static int set_config_cgroup_dir(const char *key, const char *value,
struct lxc_conf *lxc_conf, void *data)
{
@@ -2910,11 +2939,14 @@ static int get_config_selinux_context(const char *key, char *retv, int inlen,
* If you ask for 'lxc.cgroup", then all cgroup entries will be printed, in
* 'lxc.cgroup.subsystem.key = value' format.
*/
-static int get_config_cgroup_controller(const char *key, char *retv, int inlen,
- struct lxc_conf *c, void *data)
+static int __get_config_cgroup_controller(const char *key, char *retv,
+ int inlen, struct lxc_conf *c,
+ int version)
{
- struct lxc_list *it;
int len;
+ size_t namespaced_token_len;
+ char *global_token, *namespaced_token;
+ struct lxc_list *it;
int fulllen = 0;
bool get_all = false;
@@ -2923,10 +2955,22 @@ static int get_config_cgroup_controller(const char *key, char *retv, int inlen,
else
memset(retv, 0, inlen);
- if (!strcmp(key, "lxc.cgroup"))
+ if (version == CGROUP2_SUPER_MAGIC) {
+ global_token = "lxc.cgroup2";
+ namespaced_token = "lxc.cgroup2.";
+ namespaced_token_len = sizeof("lxc.cgroup2.") - 1;;
+ } else if (version == CGROUP_SUPER_MAGIC) {
+ global_token = "lxc.cgroup";
+ namespaced_token = "lxc.cgroup.";
+ namespaced_token_len = sizeof("lxc.cgroup.") - 1;;
+ } else {
+ return -1;
+ }
+
+ if (strcmp(key, global_token) == 0)
get_all = true;
- else if (!strncmp(key, "lxc.cgroup.", 11))
- key += 11;
+ else if (strncmp(key, namespaced_token, namespaced_token_len) == 0)
+ key += namespaced_token_len;
else
return -1;
@@ -2934,8 +2978,11 @@ static int get_config_cgroup_controller(const char *key, char *retv, int inlen,
struct lxc_cgroup *cg = it->elem;
if (get_all) {
- strprint(retv, inlen, "lxc.cgroup.%s = %s\n",
- cg->subsystem, cg->value);
+ if (version != cg->version)
+ continue;
+
+ strprint(retv, inlen, "%s.%s = %s\n",
+ global_token, cg->subsystem, cg->value);
} else if (!strcmp(cg->subsystem, key)) {
strprint(retv, inlen, "%s\n", cg->value);
}
@@ -2944,6 +2991,20 @@ static int get_config_cgroup_controller(const char *key, char *retv, int inlen,
return fulllen;
}
+static int get_config_cgroup_controller(const char *key, char *retv, int inlen,
+ struct lxc_conf *c, void *data)
+{
+ return __get_config_cgroup_controller(key, retv, inlen, c,
+ CGROUP_SUPER_MAGIC);
+}
+
+static int get_config_cgroup2_controller(const char *key, char *retv, int inlen,
+ struct lxc_conf *c, void *data)
+{
+ return __get_config_cgroup_controller(key, retv, inlen, c,
+ CGROUP2_SUPER_MAGIC);
+}
+
static int get_config_cgroup_dir(const char *key, char *retv, int inlen,
struct lxc_conf *lxc_conf, void *data)
{
@@ -3632,7 +3693,13 @@ static inline int clr_config_selinux_context(const char *key,
static inline int clr_config_cgroup_controller(const char *key,
struct lxc_conf *c, void *data)
{
- return lxc_clear_cgroups(c, key);
+ return lxc_clear_cgroups(c, key, CGROUP_SUPER_MAGIC);
+}
+
+static inline int clr_config_cgroup2_controller(const char *key,
+ struct lxc_conf *c, void *data)
+{
+ return lxc_clear_cgroups(c, key, CGROUP2_SUPER_MAGIC);
}
static int clr_config_cgroup_dir(const char *key, struct lxc_conf *lxc_conf,
From 6b38e644cb8d4942f16d9a82b72d56a72b9aa81d Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Wed, 31 Jan 2018 16:45:04 +0100
Subject: [PATCH 7/7] cgroups: handle limits on the unified hierarchy
Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
src/lxc/cgroups/cgfs.c | 4 ++--
src/lxc/cgroups/cgfsng.c | 58 +++++++++++++++++++++++++++++++++++++++------
src/lxc/cgroups/cgmanager.c | 3 ++-
src/lxc/cgroups/cgroup.c | 2 +-
src/lxc/cgroups/cgroup.h | 5 ++--
5 files changed, 59 insertions(+), 13 deletions(-)
diff --git a/src/lxc/cgroups/cgfs.c b/src/lxc/cgroups/cgfs.c
index fc25bc9b5..89aec91f7 100644
--- a/src/lxc/cgroups/cgfs.c
+++ b/src/lxc/cgroups/cgfs.c
@@ -2525,14 +2525,14 @@ static bool cgfs_unfreeze(void *hdata)
return ret == 0;
}
-static bool cgroupfs_setup_limits(void *hdata, struct lxc_list *cgroup_conf,
+static bool cgroupfs_setup_limits(void *hdata, struct lxc_conf *conf,
bool with_devices)
{
struct cgfs_data *d = hdata;
if (!d)
return false;
- return do_setup_cgroup_limits(d, cgroup_conf, with_devices) == 0;
+ return do_setup_cgroup_limits(d, &conf->cgroup, with_devices) == 0;
}
static bool lxc_cgroupfs_attach(const char *name, const char *lxcpath, pid_t pid)
diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c
index c94257c9f..5ecc3f0d1 100644
--- a/src/lxc/cgroups/cgfsng.c
+++ b/src/lxc/cgroups/cgfsng.c
@@ -2161,6 +2161,7 @@ static bool cgfsng_escape()
return true;
}
+/* TODO: handle the unified cgroup hierarchy */
static int cgfsng_num_hierarchies(void)
{
int i;
@@ -2171,15 +2172,15 @@ static int cgfsng_num_hierarchies(void)
return i;
}
+/* TODO: handle the unified cgroup hierarchy */
static bool cgfsng_get_hierarchies(int n, char ***out)
{
int i;
/* sanity check n */
- for (i = 0; i < n; i++) {
+ for (i = 0; i < n; i++)
if (!hierarchies[i])
return false;
- }
*out = hierarchies[i]->controllers;
@@ -2541,8 +2542,9 @@ static int lxc_cgroup_set_data(const char *filename, const char *value, struct c
return ret;
}
-static bool cgfsng_setup_limits(void *hdata, struct lxc_list *cgroup_settings,
- bool do_devices)
+static bool __cgfsng_setup_limits_legacy(void *hdata,
+ struct lxc_list *cgroup_settings,
+ bool do_devices)
{
struct cgfsng_handler_data *d = hdata;
struct lxc_list *iterator, *sorted_cgroup_settings, *next;
@@ -2553,9 +2555,8 @@ static bool cgfsng_setup_limits(void *hdata, struct lxc_list *cgroup_settings,
return true;
sorted_cgroup_settings = sort_cgroup_settings(cgroup_settings);
- if (!sorted_cgroup_settings) {
+ if (!sorted_cgroup_settings)
return false;
- }
lxc_list_for_each(iterator, sorted_cgroup_settings) {
cg = iterator->elem;
@@ -2576,7 +2577,7 @@ static bool cgfsng_setup_limits(void *hdata, struct lxc_list *cgroup_settings,
}
ret = true;
- INFO("cgroup has been setup");
+ INFO("Limits for the legacy cgroup hierarchies have been setup");
out:
lxc_list_for_each_safe(iterator, sorted_cgroup_settings, next) {
lxc_list_del(iterator);
@@ -2586,6 +2587,49 @@ static bool cgfsng_setup_limits(void *hdata, struct lxc_list *cgroup_settings,
return ret;
}
+static bool __cgfsng_setup_limits_unified(void *hdata,
+ struct lxc_list *cgroup_settings)
+{
+ struct lxc_list *iterator;
+ struct hierarchy *h = unified;
+
+ if (lxc_list_empty(cgroup_settings))
+ return true;
+
+ if (!h)
+ return false;
+
+ lxc_list_for_each(iterator, cgroup_settings) {
+ int ret;
+ char *fullpath;
+ struct lxc_cgroup *cg = iterator->elem;
+
+ fullpath = must_make_path(h->fullcgpath, cg->subsystem, NULL);
+ ret = lxc_write_to_file(fullpath, cg->value, strlen(cg->value), false);
+ free(fullpath);
+ if (ret < 0) {
+ SYSERROR("Failed to set \"%s\" to \"%s\"", cg->subsystem, cg->value);
+ return false;
+ }
+ TRACE("Set \"%s\" to \"%s\"", cg->subsystem, cg->value);
+ }
+
+ INFO("Limits for the unified cgroup hierarchy have been setup");
+ return true;
+}
+
+static bool cgfsng_setup_limits(void *hdata, struct lxc_conf *conf,
+ bool do_devices)
+{
+ bool bret;
+
+ bret = __cgfsng_setup_limits_legacy(hdata, &conf->cgroup, do_devices);
+ if (!bret)
+ return false;
+
+ return __cgfsng_setup_limits_unified(hdata, &conf->cgroup2);
+}
+
static struct cgroup_ops cgfsng_ops = {
.init = cgfsng_init,
.destroy = cgfsng_destroy,
diff --git a/src/lxc/cgroups/cgmanager.c b/src/lxc/cgroups/cgmanager.c
index dccc04c3c..c23443c9f 100644
--- a/src/lxc/cgroups/cgmanager.c
+++ b/src/lxc/cgroups/cgmanager.c
@@ -1479,11 +1479,12 @@ static bool cgm_unfreeze(void *hdata)
return ret;
}
-static bool cgm_setup_limits(void *hdata, struct lxc_list *cgroup_settings, bool do_devices)
+static bool cgm_setup_limits(void *hdata, struct lxc_conf *conf, bool do_devices)
{
struct cgm_data *d = hdata;
struct lxc_list *iterator, *sorted_cgroup_settings, *next;
struct lxc_cgroup *cg;
+ struct lxc_list *cgroup_settings = &conf->cgroup;
bool ret = false;
if (lxc_list_empty(cgroup_settings))
diff --git a/src/lxc/cgroups/cgroup.c b/src/lxc/cgroups/cgroup.c
index 36a665b1c..1f78a6317 100644
--- a/src/lxc/cgroups/cgroup.c
+++ b/src/lxc/cgroups/cgroup.c
@@ -150,7 +150,7 @@ bool cgroup_setup_limits(struct lxc_handler *handler, bool with_devices)
{
if (ops)
return ops->setup_limits(handler->cgroup_data,
- &handler->conf->cgroup, with_devices);
+ handler->conf, with_devices);
return false;
}
diff --git a/src/lxc/cgroups/cgroup.h b/src/lxc/cgroups/cgroup.h
index f409eee7c..d288b4c72 100644
--- a/src/lxc/cgroups/cgroup.h
+++ b/src/lxc/cgroups/cgroup.h
@@ -60,7 +60,7 @@ struct cgroup_ops {
int (*set)(const char *filename, const char *value, const char *name, const char *lxcpath);
int (*get)(const char *filename, char *value, size_t len, const char *name, const char *lxcpath);
bool (*unfreeze)(void *hdata);
- bool (*setup_limits)(void *hdata, struct lxc_list *cgroup_conf, bool with_devices);
+ bool (*setup_limits)(void *hdata, struct lxc_conf *conf, bool with_devices);
bool (*chown)(void *hdata, struct lxc_conf *conf);
bool (*attach)(const char *name, const char *lxcpath, pid_t pid);
bool (*mount_cgroup)(void *hdata, const char *root, int type);
@@ -80,7 +80,8 @@ extern bool cgroup_enter(struct lxc_handler *handler);
extern void cgroup_cleanup(struct lxc_handler *handler);
extern bool cgroup_create_legacy(struct lxc_handler *handler);
extern int cgroup_nrtasks(struct lxc_handler *handler);
-extern const char *cgroup_get_cgroup(struct lxc_handler *handler, const char *subsystem);
+extern const char *cgroup_get_cgroup(struct lxc_handler *handler,
+ const char *subsystem);
extern bool cgroup_escape();
extern int cgroup_num_hierarchies();
extern bool cgroup_get_hierarchies(int i, char ***out);
More information about the lxc-devel
mailing list