[lxc-devel] [lxc/master] cgfsng: next generation filesystem-backed cgroup implementation
hallyn on Github
lxc-bot at linuxcontainers.org
Sat Mar 5 02:20:11 UTC 2016
A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 678 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20160305/c6958c08/attachment.bin>
-------------- next part --------------
From 4843b5f805b6b77a1f37944c8e7fe6ca442753cd Mon Sep 17 00:00:00 2001
From: Serge Hallyn <serge.hallyn at ubuntu.com>
Date: Thu, 3 Mar 2016 10:31:23 -0800
Subject: [PATCH] cgfsng: next generation filesystem-backed cgroup
implementation
This makes simplifying assumptions: all usable cgroups must be
mounted under /sys/fs/cgroup/controller or /sys/fs/cgroup/contr1,contr2.
Currently this will only work with cgroup namespaces, because
lxc.mount.auto = cgroup is not implemented. So cgfsng_ops_init()
returns NULL if cgroup namespaces are not enabled.
Signed-off-by: Serge Hallyn <serge.hallyn at ubuntu.com>
---
src/lxc/Makefile.am | 1 +
src/lxc/cgfs.c | 24 +-
src/lxc/cgfsng.c | 1466 +++++++++++++++++++++++++++++++++++++++++++++++++++
src/lxc/cgmanager.c | 4 +-
src/lxc/cgroup.c | 9 +-
src/lxc/cgroup.h | 5 +-
src/lxc/criu.c | 18 +-
src/lxc/criu.h | 2 +-
src/lxc/utils.c | 22 +
src/lxc/utils.h | 1 +
10 files changed, 1518 insertions(+), 34 deletions(-)
create mode 100644 src/lxc/cgfsng.c
diff --git a/src/lxc/Makefile.am b/src/lxc/Makefile.am
index 9f7a29e..0861fa3 100644
--- a/src/lxc/Makefile.am
+++ b/src/lxc/Makefile.am
@@ -89,6 +89,7 @@ liblxc_so_SOURCES = \
error.h error.c \
parse.c parse.h \
cgfs.c \
+ cgfsng.c \
cgroup.c cgroup.h \
lxc.h \
initutils.c initutils.h \
diff --git a/src/lxc/cgfs.c b/src/lxc/cgfs.c
index 05e7bcf..c493d58 100644
--- a/src/lxc/cgfs.c
+++ b/src/lxc/cgfs.c
@@ -141,7 +141,6 @@ static int do_cgroup_set(const char *cgroup_path, const char *sub_filename, cons
static bool cgroup_devices_has_allow_or_deny(struct cgfs_data *d, char *v, bool for_allow);
static int do_setup_cgroup_limits(struct cgfs_data *d, struct lxc_list *cgroup_settings, bool do_devices);
static int cgroup_recursive_task_count(const char *cgroup_path);
-static int count_lines(const char *fn);
static int handle_cgroup_settings(struct cgroup_mount_point *mp, char *cgroup_path);
static bool init_cpuset_if_needed(struct cgroup_mount_point *mp, const char *path);
@@ -2116,7 +2115,7 @@ static int cgroup_recursive_task_count(const char *cgroup_path)
if (r >= 0)
n += r;
} else if (!strcmp(dent->d_name, "tasks")) {
- r = count_lines(sub_path);
+ r = lxc_count_file_lines(sub_path);
if (r >= 0)
n += r;
}
@@ -2128,25 +2127,6 @@ static int cgroup_recursive_task_count(const char *cgroup_path)
return n;
}
-static int count_lines(const char *fn)
-{
- FILE *f;
- char *line = NULL;
- size_t sz = 0;
- int n = 0;
-
- f = fopen_cloexec(fn, "r");
- if (!f)
- return -1;
-
- while (getline(&line, &sz, f) != -1) {
- n++;
- }
- free(line);
- fclose(f);
- return n;
-}
-
static int handle_cgroup_settings(struct cgroup_mount_point *mp,
char *cgroup_path)
{
@@ -2420,7 +2400,7 @@ static const char *cgfs_canonical_path(void *hdata)
return path;
}
-static bool cgfs_escape(void)
+static bool cgfs_escape(void *hdata)
{
struct cgroup_meta_data *md;
int i;
diff --git a/src/lxc/cgfsng.c b/src/lxc/cgfsng.c
new file mode 100644
index 0000000..4654343
--- /dev/null
+++ b/src/lxc/cgfsng.c
@@ -0,0 +1,1466 @@
+/*
+ * lxc: linux Container library
+ *
+ * Copyright © 2016 Canonical Ltd.
+ *
+ * Authors:
+ * Serge Hallyn <serge.hallyn at ubuntu.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/*
+ * cgfs-ng.c: this is a new, simplified implementation of a filesystem
+ * cgroup backend. The original cgfs.c was designed to be as flexible
+ * as possible. It would try to find cgroup filesystems no matter where
+ * or how you had them mounted, and deduce the most usable mount for
+ * each controller. It also was not designed for unprivileged use, as
+ * that was reserved for cgmanager.
+ *
+ * This new implementation assumes that cgroup filesystems are mounted
+ * under /sys/fs/cgroup/clist where clist is either the controller, or
+ * a comman-separated list of controllers.
+ */
+#include "config.h"
+#include <stdio.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <dirent.h>
+#include <grp.h>
+
+#include "log.h"
+#include "cgroup.h"
+#include "utils.h"
+#include "commands.h"
+
+lxc_log_define(lxc_cgfsng, lxc);
+
+static struct cgroup_ops cgfsng_ops;
+
+//#define EXTRADEBUG 1
+
+/*
+ * A descriptor for a mounted hierarchy
+ * @controllers: either NULL, or a null-terminated list of all
+ * the co-mounted controllers
+ * @mountpoint: the mountpoint we will use. It will be either
+ * /sys/fs/cgroup/controller or /sys/fs/cgroup/controllerlist
+ * @base_cgroup: the cgroup under which the container cgroup path
+ is created. This will be either the caller's cgroup (if not
+ root), or init's cgroup (if root).
+ */
+struct hierarchy {
+ char **controllers;
+ char *mountpoint;
+ char *base_cgroup;
+ char *fullcgpath;
+};
+
+/*
+ * The cgroup data which is attached to the lxc_handler.
+ * @hierarchies - a NULL-terminated array of struct hierarchy, one per
+ * hierarchy. No duplicates. First sufficient, writeable mounted
+ * hierarchy wins
+ * @cgroup_use - a copy of the lxc.cgroup.use
+ * @cgroup_pattern - a copy of the lxc.cgroup.pattern
+ * @container_cgroup - if not null, the cgroup which was created for
+ * the container. For each hierarchy, it is created under the
+ * @hierarchy->base_cgroup directory. Relative to the base_cgroup
+ * it is the same for all hierarchies.
+ * @name - the container name
+ */
+struct cgfsng_handler_data {
+ struct hierarchy **hierarchies;
+ char *cgroup_use;
+ char *cgroup_pattern;
+ char *container_cgroup; // cgroup we created for the container
+ char *name; // container name
+};
+
+static void free_string_list(char **clist)
+{
+ if (clist) {
+ int i;
+
+ for (i = 0; clist[i]; i++)
+ free(clist[i]);
+ free(clist);
+ }
+}
+
+/* Re-alllocate a pointer, do not fail */
+static void *must_realloc(void *orig, size_t sz)
+{
+ void *ret;
+
+ do {
+ ret = realloc(orig, sz);
+ } while (!ret);
+ return ret;
+}
+
+/* Allocate a pointer, do not fail */
+static void *must_alloc(size_t sz)
+{
+ return must_realloc(NULL, sz);
+}
+
+/* return copy of string @entry; do not fail. */
+static char *must_copy_string(const char *entry)
+{
+ char *ret;
+
+ if (!entry)
+ return NULL;
+ do {
+ ret = strdup(entry);
+ } while (!ret);
+ return ret;
+}
+
+/*
+ * This is a special case - return a copy of @entry
+ * prepending 'name='. I.e. turn systemd into name=systemd.
+ * Do not fail.
+ */
+static char *must_prefix_named(char *entry)
+{
+ char *ret;
+ size_t len = strlen(entry);
+
+ ret = must_alloc(len + 6);
+ snprintf(ret, len + 6, "name=%s", entry);
+ return ret;
+}
+
+/*
+ * Given a pointer to a null-terminated array of pointers, realloc to
+ * add one entry, and point the new entry to NULL. Do not fail. Return
+ * the index to the second-to-last entry - that is, the one which is
+ * now available for use (keeping the list null-terminated).
+ */
+static int append_null_to_list(void ***list)
+{
+ int newentry = 0;
+
+ if (*list)
+ for (; (*list)[newentry]; newentry++);
+
+ *list = must_realloc(*list, (newentry + 2) * sizeof(void **));
+ (*list)[newentry + 1] = NULL;
+ return newentry;
+}
+
+/*
+ * Given a null-terminated array of strings, check whether @entry
+ * is one of the strings
+ */
+static bool string_in_list(char **list, const char *entry)
+{
+ int i;
+
+ if (!list)
+ return false;
+ for (i = 0; list[i]; i++)
+ if (strcmp(list[i], entry) == 0)
+ return true;
+
+ return false;
+}
+
+/*
+ * append an entry to the clist. Do not fail.
+ * *clist must be NULL the first time we are called.
+ *
+ * We also handle named subsystems here. Any controller which is not a
+ * kernel subsystem, we prefix 'name='. Any which is both a kernel and
+ * named subsystem, we refuse to use because we're not sure which we
+ * have here. (TODO - we could work around this in some cases by just
+ * remounting to be unambiguous, or by comparing mountpoint contents
+ * with current cgroup)
+ *
+ * The last entry will always be NULL.
+ */
+static void must_append_controller(char **klist, char **nlist, char ***clist, char *entry)
+{
+ int newentry;
+ char *copy;
+
+ if (string_in_list(klist, entry) && string_in_list(nlist, entry)) {
+ ERROR("Refusing to use ambiguous controller '%s'", entry);
+ ERROR("It is both a named and kernel subsystem");
+ return;
+ }
+
+ newentry = append_null_to_list((void ***)clist);
+
+ if (strncmp(entry, "name=", 5) == 0)
+ copy = must_copy_string(entry);
+ else if (string_in_list(klist, entry))
+ copy = must_copy_string(entry);
+ else
+ copy = must_prefix_named(entry);
+
+ (*clist)[newentry] = copy;
+}
+
+static void free_hierarchies(struct hierarchy **hlist)
+{
+ if (hlist) {
+ int i;
+
+ for (i = 0; hlist[i]; i++) {
+ free(hlist[i]->mountpoint);
+ free(hlist[i]->base_cgroup);
+ free(hlist[i]->fullcgpath);
+ free_string_list(hlist[i]->controllers);
+ }
+ free(hlist);
+ }
+}
+
+static void free_handler_data(struct cgfsng_handler_data *d)
+{
+ free_hierarchies(d->hierarchies);
+ free(d->cgroup_use);
+ free(d->cgroup_pattern);
+ free(d->container_cgroup);
+ free(d->name);
+ free(d);
+}
+
+/*
+ * Given a handler's cgroup data, return the struct hierarchy for the
+ * controller @c, or NULL if there is none.
+ */
+struct hierarchy *get_hierarchy(struct cgfsng_handler_data *d, const char *c)
+{
+ int i;
+
+ if (!d || !d->hierarchies)
+ return NULL;
+ for (i = 0; d->hierarchies[i]; i++) {
+ if (string_in_list(d->hierarchies[i]->controllers, c))
+ return d->hierarchies[i];
+ }
+ return NULL;
+}
+
+/*
+ * Given two null-terminated lists of strings, return true if any string
+ * is in both.
+ */
+static bool controller_lists_intersect(char **l1, char **l2)
+{
+ int i;
+
+ if (!l1 || !l2)
+ return false;
+
+ for (i = 0; l1[i]; i++) {
+ if (string_in_list(l2, l1[i]))
+ return true;
+ }
+ return false;
+}
+
+/*
+ * For a null-terminated list of controllers @clist, return true if any of
+ * those controllers is already listed the null-terminated list of
+ * hierarchies @hlist. Realistically, if one is present, all must be present.
+ */
+static bool controller_list_is_dup(struct hierarchy **hlist, char **clist)
+{
+ int i;
+
+ if (!hlist)
+ return false;
+ for (i = 0; hlist[i]; i++)
+ if (controller_lists_intersect(hlist[i]->controllers, clist))
+ return true;
+ return false;
+
+}
+
+/*
+ * Return true if the controller @entry is found in the null-terminated
+ * list of hierarchies @hlist
+ */
+static bool controller_found(struct hierarchy **hlist, char *entry)
+{
+ int i;
+ if (!hlist)
+ return false;
+
+ for (i = 0; hlist[i]; i++)
+ if (string_in_list(hlist[i]->controllers, entry))
+ return true;
+ return false;
+}
+
+/*
+ * Return true if all of the controllers which we require have been
+ * found. The required list is systemd, freezer, and anything in
+ * lxc.cgroup.use.
+ */
+static bool all_controllers_found(struct cgfsng_handler_data *d)
+{
+ char *p, *saveptr = NULL;
+ struct hierarchy ** hlist = d->hierarchies;
+
+ if (!controller_found(hlist, "name=systemd")) {
+ ERROR("no systemd controller mountpoint found");
+ return false;
+ }
+ if (!controller_found(hlist, "freezer")) {
+ ERROR("no freezer controller mountpoint found");
+ return false;
+ }
+
+ if (!d->cgroup_use)
+ return true;
+ for (p = strtok_r(d->cgroup_use, ",", &saveptr); p;
+ p = strtok_r(NULL, ",", &saveptr)) {
+ if (!controller_found(hlist, p)) {
+ ERROR("no %s controller mountpoint found", p);
+ return false;
+ }
+ }
+ return true;
+}
+
+/* Return true if the fs type is fuse.lxcfs */
+static bool is_lxcfs(const char *line)
+{
+ char *p = strstr(line, " - ");
+ if (!p)
+ return false;
+ return strncmp(p, " - fuse.lxcfs ", 14);
+}
+
+/*
+ * Get the controllers from a mountinfo line
+ * There are other ways we could get this info. For lxcfs, field 3
+ * is /cgroup/controller-list. For cgroupfs, we could parse the mount
+ * options. But we simply assume that the mountpoint must be
+ * /sys/fs/cgroup/controller-list
+ */
+static char **get_controllers(char **klist, char **nlist, char *line)
+{
+ // the fourth field is /sys/fs/cgroup/comma-delimited-controller-list
+ int i;
+ char *p = line, *p2, *tok, *saveptr = NULL;
+ char **aret = NULL;
+
+ for (i = 0; i < 4; i++) {
+ p = index(p, ' ');
+ if (!p)
+ return NULL;
+ p++;
+ }
+ if (!p)
+ return NULL;
+ /* note - if we change how mountinfo works, then our caller
+ * will need to verify /sys/fs/cgroup/ in this field */
+ if (strncmp(p, "/sys/fs/cgroup/", 15) != 0)
+ return NULL;
+ p += 15;
+ p2 = index(p, ' ');
+ if (!p2) {
+ ERROR("corrupt mountinfo");
+ return NULL;
+ }
+ *p2 = '\0';
+ for (tok = strtok_r(p, ",", &saveptr); tok;
+ tok = strtok_r(NULL, ",", &saveptr)) {
+ must_append_controller(klist, nlist, &aret, tok);
+ }
+
+ return aret;
+}
+
+/* return true if the fstype is cgroup */
+static bool is_cgroupfs(char *line)
+{
+ char *p = strstr(line, " - ");
+ if (!p)
+ return false;
+ return strncmp(p, " - cgroup ", 10);
+}
+
+/* Add a controller to our list of hierarchies */
+static void add_controller(struct cgfsng_handler_data *d, char **clist,
+ char *mountpoint, char *base_cgroup)
+{
+ struct hierarchy *new;
+ int newentry;
+
+ new = must_alloc(sizeof(*new));
+ new->controllers = clist;
+ new->mountpoint = mountpoint;
+ new->base_cgroup = base_cgroup;
+ new->fullcgpath = NULL;
+
+ newentry = append_null_to_list((void ***)&d->hierarchies);
+ d->hierarchies[newentry] = new;
+}
+
+/*
+ * Get a copy of the mountpoint from @line, which is a line from
+ * /proc/self/mountinfo
+ */
+static char *get_mountpoint(char *line)
+{
+ int i;
+ char *p = line, *sret;
+ size_t len;
+
+ for (i = 0; i < 4; i++) {
+ p = index(p, ' ');
+ if (!p)
+ return NULL;
+ p++;
+ }
+ /* we've already stuck a \0 after the mountpoint */
+ len = strlen(p);
+ sret = must_alloc(len + 1);
+ memcpy(sret, p, len);
+ sret[len] = '\0';
+ return sret;
+}
+
+/*
+ * Given a multi-line string, return a null-terminated copy of the
+ * current line.
+ */
+static char *copy_to_eol(char *p)
+{
+ char *p2 = index(p, '\n'), *sret;
+ size_t len;
+
+ if (!p2)
+ return NULL;
+
+ len = p2 - p;
+ sret = must_alloc(len + 1);
+ memcpy(sret, p, len);
+ sret[len] = '\0';
+ return sret;
+}
+
+/*
+ * cgline: pointer to character after the first ':' in a line in a
+ * \n-terminated /proc/self/cgroup file. Check whether * controller c is
+ * present.
+ */
+static bool controller_in_clist(char *cgline, char *c)
+{
+ char *tok, *saveptr = NULL, *eol, *tmp;
+ size_t len;
+
+ eol = index(cgline, ':');
+ if (!eol)
+ return false;
+
+ len = eol - cgline;
+ tmp = alloca(len + 1);
+ memcpy(tmp, cgline, len);
+ tmp[len] = '\0';
+
+ for (tok = strtok_r(tmp, ",", &saveptr); tok;
+ tok = strtok_r(NULL, ",", &saveptr)) {
+ if (strcmp(tok, c) == 0)
+ return true;
+ }
+ return false;
+}
+
+/*
+ * @basecginfo is a copy of /proc/$$/cgroup. Return the current
+ * cgroup for @controller
+ */
+static char *get_current_cgroup(char *basecginfo, char *controller)
+{
+ char *p = basecginfo;
+
+ while (1) {
+ p = index(p, ':');
+ if (!p)
+ return NULL;
+ p++;
+ if (controller_in_clist(p, controller)) {
+ p = index(p, ':');
+ if (!p)
+ return NULL;
+ p++;
+ return copy_to_eol(p);
+ }
+
+ p = index(p, '\n');
+ if (!p)
+ return NULL;
+ p++;
+ }
+}
+
+static void append_line(char **dest, size_t oldlen, char *new, size_t newlen)
+{
+ size_t full = oldlen + newlen;
+
+ *dest = must_realloc(*dest, full + 1);
+
+ strcat(*dest, new);
+}
+
+/* Slurp in a whole file */
+static char *read_file(char *fnam)
+{
+ FILE *f;
+ char *line = NULL, *buf = NULL;
+ size_t len = 0, fulllen = 0;
+
+ f = fopen(fnam, "r");
+ if (!f)
+ return NULL;
+ while (getline(&line, &len, f) != -1) {
+ append_line(&buf, fulllen, line, len);
+ fulllen += len;
+ }
+ fclose(f);
+ free(line);
+ return buf;
+}
+
+static char *must_make_path(const char *first, ...) __attribute__((sentinel));
+
+/*
+ * Given a hierarchy @mountpoint and base @path, verify that we can create
+ * directories underneath it.
+ */
+static bool test_writeable(char *mountpoint, char *path)
+{
+ char *fullpath = must_make_path(mountpoint, path, NULL);
+ int ret;
+
+ ret = access(fullpath, W_OK);
+ free(fullpath);
+ return ret == 0;
+}
+
+static void must_append_string(char ***list, char *entry)
+{
+ int newentry = append_null_to_list((void ***)list);
+ char *copy;
+
+ copy = must_copy_string(entry);
+ (*list)[newentry] = copy;
+}
+
+static void get_existing_subsystems(char ***klist, char ***nlist)
+{
+ FILE *f;
+ char *line = NULL;
+ size_t len = 0;
+
+ if ((f = fopen("/proc/self/cgroup", "r")) == NULL)
+ return;
+ while (getline(&line, &len, f) != -1) {
+ char *p, *p2, *tok, *saveptr = NULL;
+ p = index(line, ':');
+ if (!p)
+ continue;
+ p++;
+ p2 = index(p, ':');
+ if (!p2)
+ continue;
+ *p2 = '\0';
+ for (tok = strtok_r(p, ",", &saveptr); tok;
+ tok = strtok_r(NULL, ",", &saveptr)) {
+ if (strncmp(tok, "name=", 5) == 0)
+ must_append_string(nlist, tok);
+ else
+ must_append_string(klist, tok);
+ }
+ }
+
+ free(line);
+ fclose(f);
+}
+
+static void trim(char *s)
+{
+ size_t len = strlen(s);
+ while (s[len-1] == '\n')
+ s[--len] = '\0';
+}
+
+#if EXTRADEBUG
+static void print_init_debuginfo(struct cgfsng_handler_data *d)
+{
+ int i;
+ printf("Cgroup information:\n");
+ printf(" container name: %s\n", d->name);
+ printf(" lxc.cgroup.use: %s\n", d->cgroup_use ? d->cgroup_use : "(none)");
+ printf(" lxc.cgroup.pattern: %s\n", d->cgroup_pattern);
+ printf(" cgroup: %s\n", d->container_cgroup ? d->container_cgroup : "(none)");
+ if (!d->hierarchies) {
+ printf(" No hierarchies found.\n");
+ return;
+ }
+ printf(" Hierarchies:\n");
+ for (i = 0; d->hierarchies[i]; i++) {
+ struct hierarchy *h = d->hierarchies[i];
+ int j;
+ printf(" %d: base_cgroup %s\n", i, h->base_cgroup);
+ printf(" mountpoint %s\n", h->mountpoint);
+ printf(" controllers:\n");
+ for (j = 0; h->controllers[j]; j++)
+ printf(" %d: %s\n", j, h->controllers[j]);
+ }
+}
+#else
+#define print_init_debuginfo(d)
+#endif
+
+/*
+ * At startup, parse_hierarchies finds all the info we need about
+ * cgroup mountpoints and current cgroups, and stores it in @d.
+ */
+static bool parse_hierarchies(struct cgfsng_handler_data *d)
+{
+ FILE *f;
+ char * line = NULL, *basecginfo;
+ char **klist = NULL, **nlist = NULL;
+ size_t len = 0;
+
+ if (geteuid())
+ basecginfo = read_file("/proc/self/cgroup");
+ else
+ basecginfo = read_file("/proc/1/cgroup");
+ if (!basecginfo)
+ return false;
+
+ if ((f = fopen("/proc/self/mountinfo", "r")) == NULL) {
+ ERROR("Failed opening /proc/self/mountinfo");
+ return false;
+ }
+
+ get_existing_subsystems(&klist, &nlist);
+#if EXTRADEBUG
+ printf("basecginfo is %s\n", basecginfo);
+ int k;
+ for (k = 0; klist[k]; k++)
+ printf("kernel subsystem %d: %s\n", k, klist[k]);
+ for (k = 0; nlist[k]; k++)
+ printf("named subsystem %d: %s\n", k, nlist[k]);
+#endif
+
+ /* we support simple cgroup mounts and lxcfs mounts */
+ while (getline(&line, &len, f) != -1) {
+ char **controller_list = NULL;
+ char *mountpoint, *base_cgroup;
+
+ if (!is_lxcfs(line) && !is_cgroupfs(line))
+ continue;
+
+ controller_list = get_controllers(klist, nlist, line);
+ if (!controller_list)
+ continue;
+
+ if (controller_list_is_dup(d->hierarchies, controller_list)) {
+ free(controller_list);
+ continue;
+ }
+
+ mountpoint = get_mountpoint(line);
+ if (!mountpoint) {
+ ERROR("Error reading mountinfo: bad line '%s'", line);
+ free_string_list(controller_list);
+ continue;
+ }
+
+ base_cgroup = get_current_cgroup(basecginfo, controller_list[0]);
+ if (!base_cgroup) {
+ ERROR("Failed to find current cgroup for controller '%s'", controller_list[0]);
+ free_string_list(controller_list);
+ free(mountpoint);
+ continue;
+ }
+ trim(base_cgroup);
+ prune_init_scope(base_cgroup);
+ if (!test_writeable(mountpoint, base_cgroup)) {
+ free_string_list(controller_list);
+ free(mountpoint);
+ free(base_cgroup);
+ continue;
+ }
+ add_controller(d, controller_list, mountpoint, base_cgroup);
+ }
+
+ free_string_list(klist);
+ free_string_list(nlist);
+
+ free(basecginfo);
+
+ fclose(f);
+ free(line);
+
+ print_init_debuginfo(d);
+
+ /* verify that all controllers in cgroup.use and all crucial
+ * controllers are accounted for
+ */
+ if (!all_controllers_found(d))
+ return false;
+
+ return true;
+}
+
+static void *cgfsng_init(const char *name)
+{
+ struct cgfsng_handler_data *d;
+ const char *cgroup_use, *cgroup_pattern;
+
+ d = must_alloc(sizeof(*d));
+ memset(d, 0, sizeof(*d));
+
+ d->name = must_copy_string(name);
+
+ errno = 0;
+ cgroup_use = lxc_global_config_value("lxc.cgroup.use");
+ if (!cgroup_use && errno != 0) { // lxc.cgroup.use can be NULL
+ SYSERROR("Error reading list of cgroups to use");
+ goto out_free;
+ }
+ d->cgroup_use = must_copy_string(cgroup_use);
+
+ cgroup_pattern = lxc_global_config_value("lxc.cgroup.pattern");
+ if (!cgroup_pattern) { // lxc.cgroup.pattern is only NULL on error
+ ERROR("Error getting cgroup pattern");
+ goto out_free;
+ }
+ d->cgroup_pattern = must_copy_string(cgroup_pattern);
+
+ if (!parse_hierarchies(d))
+ goto out_free;
+
+ print_init_debuginfo(d);
+
+ return d;
+
+out_free:
+ free_handler_data(d);
+ return NULL;
+}
+
+/*
+ * Concatenate all passed-in strings into one path. Do not fail. If any piece is
+ * not prefixed with '/', add a '/'.
+ */
+static char *must_make_path(const char *first, ...)
+{
+ va_list args;
+ char *cur, *dest;
+ size_t full_len = strlen(first);
+
+ dest = must_copy_string(first);
+
+ va_start(args, first);
+ while ((cur = va_arg(args, char *)) != NULL) {
+ full_len += strlen(cur);
+ if (cur[0] != '/')
+ full_len++;
+ dest = must_realloc(dest, full_len + 1);
+ if (cur[0] != '/')
+ strcat(dest, "/");
+ strcat(dest, cur);
+ }
+ va_end(args);
+
+ return dest;
+}
+
+static int cgroup_rmdir(char *dirname)
+{
+ struct dirent dirent, *direntp;
+ DIR *dir;
+ int r = 0;
+
+ dir = opendir(dirname);
+ if (!dir)
+ return -1;
+
+ while (!readdir_r(dir, &dirent, &direntp)) {
+ struct stat mystat;
+ char *pathname;
+
+ if (!direntp)
+ break;
+
+ if (!strcmp(direntp->d_name, ".") ||
+ !strcmp(direntp->d_name, ".."))
+ continue;
+
+ pathname = must_make_path(dirname, direntp->d_name, NULL);
+
+ if (lstat(pathname, &mystat)) {
+ if (!r)
+ WARN("failed to stat %s\n", pathname);
+ r = -1;
+ goto next;
+ }
+
+ if (!S_ISDIR(mystat.st_mode))
+ goto next;
+ if (cgroup_rmdir(pathname) < 0)
+ r = -1;
+next:
+ free(pathname);
+ }
+
+ if (rmdir(dirname) < 0) {
+ if (!r)
+ WARN("%s: failed to delete %s: %m", __func__, dirname);
+ r = -1;
+ }
+
+ if (closedir(dir) < 0) {
+ if (!r)
+ WARN("%s: failed to delete %s: %m", __func__, dirname);
+ r = -1;
+ }
+ return r;
+}
+
+static int rmdir_wrapper(void *data)
+{
+ char *path = data;
+
+ if (setresgid(0,0,0) < 0)
+ SYSERROR("Failed to setgid to 0");
+ if (setresuid(0,0,0) < 0)
+ SYSERROR("Failed to setuid to 0");
+ if (setgroups(0, NULL) < 0)
+ SYSERROR("Failed to clear groups");
+
+ return cgroup_rmdir(path);
+}
+
+void recursive_destroy(char *path, struct lxc_conf *conf)
+{
+ int r;
+ if (conf && !lxc_list_empty(&conf->id_map))
+ r = userns_exec_1(conf, rmdir_wrapper, path);
+ else
+ r = cgroup_rmdir(path);
+
+ if (r < 0)
+ ERROR("Error destroying %s\n", path);
+}
+
+static void cgfsng_destroy(void *hdata, struct lxc_conf *conf)
+{
+ struct cgfsng_handler_data *d = hdata;
+
+ if (!d)
+ return;
+
+ if (d->container_cgroup && d->hierarchies) {
+ int i;
+ for (i = 0; d->hierarchies[i]; i++) {
+ struct hierarchy *h = d->hierarchies[i];
+ if (!h->fullcgpath) {
+ recursive_destroy(h->fullcgpath, conf);
+ free(h->fullcgpath);
+ h->fullcgpath = NULL;
+ }
+ }
+ }
+
+ free_handler_data(d);
+}
+
+struct cgroup_ops *cgfsng_ops_init(void)
+{
+ /* TODO - when cgroup_mount is implemented, drop this check */
+ if (!file_exists("/proc/self/ns/cgroup"))
+ return NULL;
+ return &cgfsng_ops;
+}
+
+static bool create_path_for_hierarchy(struct hierarchy *h, char *cgname)
+{
+ char *fullpath = must_make_path(h->mountpoint, h->base_cgroup, cgname, NULL);
+ int ret;
+
+ ret = mkdir_p(fullpath, 0755);
+ h->fullcgpath = fullpath;
+ return ret == 0;
+}
+
+static void remove_path_for_hierarchy(struct hierarchy *h, char *cgname)
+{
+ if (rmdir(h->fullcgpath) < 0)
+ SYSERROR("Failed to clean up cgroup %s from failed creation attempt", h->fullcgpath);
+ free(h->fullcgpath);
+ h->fullcgpath = NULL;
+}
+
+/*
+ * Try to create the same cgrou pin all hierarchies.
+ * Start with cgroup_pattern; next cgroup_pattern-1, -2, ..., -999
+ */
+static inline bool cgfsng_create(void *hdata)
+{
+ struct cgfsng_handler_data *d = hdata;
+ char *tmp, *cgname, *offset;
+ int i, idx = 0;
+ size_t len;
+
+ if (!d)
+ return false;
+ if (d->container_cgroup) {
+ WARN("cgfsng_create called a second time");
+ return false;
+ }
+
+ tmp = lxc_string_replace("%n", d->name, d->cgroup_pattern);
+ if (!tmp) {
+ ERROR("Failed expanding cgroup name pattern");
+ return false;
+ }
+ len = strlen(tmp) + 5; // leave room for -NNN\0
+ cgname = must_alloc(len);
+ strcpy(cgname, tmp);
+ free(tmp);
+ offset = cgname + len - 5;
+
+again:
+ if (idx == 1000)
+ goto out_free;
+ if (idx)
+ snprintf(offset, 5, "-%d", idx);
+ for (i = 0; d->hierarchies[i]; i++) {
+ if (!create_path_for_hierarchy(d->hierarchies[i], cgname)) {
+ int j;
+ SYSERROR("Failed to create %s: %s", d->hierarchies[i]->fullcgpath, strerror(errno));
+ free(d->hierarchies[i]->fullcgpath);
+ d->hierarchies[i]->fullcgpath = NULL;
+ for (j = 0; j < i; j++)
+ remove_path_for_hierarchy(d->hierarchies[j], cgname);
+ idx++;
+ goto again;
+ }
+ }
+ /* Done */
+ d->container_cgroup = cgname;
+ return true;
+
+out_free:
+ free(cgname);
+ return false;
+}
+
+static const char *cgfsng_canonical_path(void *hdata)
+{
+ struct cgfsng_handler_data *d = hdata;
+
+ return d->container_cgroup;
+}
+
+static bool cgfsng_enter(void *hdata, pid_t pid)
+{
+ struct cgfsng_handler_data *d = hdata;
+ char pidstr[25];
+ int i, len;
+
+ len = snprintf(pidstr, 25, "%d", pid);
+ if (len < 0 || len > 25)
+ return false;
+
+ for (i = 0; d->hierarchies[i]; i++) {
+ char *fullpath = must_make_path(d->hierarchies[i]->fullcgpath,
+ "cgroup.procs", NULL);
+ if (lxc_write_to_file(fullpath, pidstr, len, false) != 0) {
+ ERROR("Failed to enter %s\n", fullpath);
+ free(fullpath);
+ return false;
+ }
+ free(fullpath);
+ }
+
+ return true;
+}
+
+struct chown_data {
+ struct cgfsng_handler_data *d;
+ uid_t origuid; // target uid in parent namespace
+};
+
+static int chown_cgroup_wrapper(void *data)
+{
+ struct chown_data *arg = data;
+ struct cgfsng_handler_data *d = arg->d;
+ uid_t destuid;
+ int i;
+
+ if (setresgid(0,0,0) < 0)
+ SYSERROR("Failed to setgid to 0");
+ if (setresuid(0,0,0) < 0)
+ SYSERROR("Failed to setuid to 0");
+ if (setgroups(0, NULL) < 0)
+ SYSERROR("Failed to clear groups");
+
+ destuid = get_ns_uid(arg->origuid);
+
+ for (i = 0; d->hierarchies[i]; i++) {
+ char *fullpath = must_make_path(d->hierarchies[i]->fullcgpath, NULL);
+ if (chown(fullpath, destuid, 0) < 0) {
+ SYSERROR("Error chowning %s", fullpath);
+ free(fullpath);
+ return -1;
+ }
+ // TODO - do we need to chown tasks and cgroup.procs too?
+
+ free(fullpath);
+ }
+
+ return 0;
+}
+
+static bool cgfsns_chown(void *hdata, struct lxc_conf *conf)
+{
+ struct cgfsng_handler_data *d = hdata;
+ struct chown_data wrap;
+
+ if (!d)
+ return false;
+
+ if (lxc_list_empty(&conf->id_map))
+ return true;
+
+ wrap.d = d;
+ wrap.origuid = geteuid();
+
+ if (userns_exec_1(conf, chown_cgroup_wrapper, &wrap) < 0) {
+ ERROR("Error requesting cgroup chown in new namespace");
+ return false;
+ }
+
+ return true;
+}
+
+static bool cgfsng_mount(void *hdata, const char *root, int type)
+{
+ if (cgns_supported())
+ return true;
+ // TODO - implement this. Not needed for cgroup namespaces
+ return false;
+}
+
+static int recursive_count_nrtasks(char *dirname)
+{
+ struct dirent dirent, *direntp;
+ DIR *dir;
+ int count = 0, ret;
+ char *path;
+
+ dir = opendir(dirname);
+ if (!dir)
+ return 0;
+
+ while (!readdir_r(dir, &dirent, &direntp)) {
+ struct stat mystat;
+
+ if (!direntp)
+ break;
+
+ if (!strcmp(direntp->d_name, ".") ||
+ !strcmp(direntp->d_name, ".."))
+ continue;
+
+ path = must_make_path(dirname, direntp->d_name, NULL);
+
+ if (lstat(path, &mystat))
+ goto next;
+
+ if (!S_ISDIR(mystat.st_mode))
+ goto next;
+
+ count += recursive_count_nrtasks(path);
+next:
+ free(path);
+ }
+
+ path = must_make_path(dirname, "cgroup.procs", NULL);
+ ret = lxc_count_file_lines(path);
+ if (ret != -1)
+ count += ret;
+ free(path);
+
+ (void) closedir(dir);
+
+ return count;
+}
+
+static int cgfsng_nrtasks(void *hdata) {
+ struct cgfsng_handler_data *d = hdata;
+ char *path;
+ int count;
+
+ if (!d || !d->container_cgroup || !d->hierarchies)
+ return -1;
+ path = must_make_path(d->hierarchies[0]->fullcgpath, NULL);
+ count = recursive_count_nrtasks(path);
+ free(path);
+ return count;
+}
+
+/* Only root needs to escape to the cgroup of its init */
+static bool cgfsng_escape(void *hdata)
+{
+ struct cgfsng_handler_data *d = hdata;
+ int i;
+
+ if (geteuid())
+ return true;
+
+ for (i = 0; d->hierarchies[i]; i++) {
+ char *fullpath = must_make_path(d->hierarchies[i]->mountpoint,
+ d->hierarchies[i]->base_cgroup,
+ "cgroup.procs", NULL);
+ if (lxc_write_to_file(fullpath, "0", 2, false) != 0) {
+ ERROR("Failed to enter %s\n", fullpath);
+ free(fullpath);
+ return false;
+ }
+ free(fullpath);
+ }
+
+ return true;
+}
+
+#define THAWED "THAWED"
+#define THAWED_LEN (strlen(THAWED))
+
+static bool cgfsng_unfreeze(void *hdata)
+{
+ struct cgfsng_handler_data *d = hdata;
+ char *fullpath;
+ struct hierarchy *h = get_hierarchy(d, "freezer");
+
+ if (!d || !h)
+ return false;
+ fullpath = must_make_path(h->fullcgpath, "freezer.state", NULL);
+ if (lxc_write_to_file(fullpath, THAWED, THAWED_LEN, false) != 0) {
+ free(fullpath);
+ return false;
+ }
+ free(fullpath);
+ return true;
+}
+
+static const char *cgfsng_get_cgroup(void *hdata, const char *subsystem)
+{
+ struct cgfsng_handler_data *d = hdata;
+ struct hierarchy *h;
+ if (!d)
+ return NULL;
+
+ h = get_hierarchy(d, subsystem);
+ if (!h)
+ return NULL;
+
+ return h->fullcgpath;
+}
+
+static bool cgfsng_attach(const char *name, const char *lxcpath, pid_t pid)
+{
+ struct cgfsng_handler_data *d;
+ char pidstr[25];
+ int i, len;
+
+ len = snprintf(pidstr, 25, "%d", pid);
+ if (len < 0 || len > 25)
+ return false;
+
+ d = cgfsng_init(name);
+ if (!d)
+ return false;
+
+ for (i = 0; d->hierarchies[i]; i++) {
+ char *path, *fullpath;
+ struct hierarchy *h = d->hierarchies[i];
+
+ path = lxc_cmd_get_cgroup_path(name, lxcpath, h->controllers[0]);
+ if (!path) // not running
+ continue;
+
+ fullpath = must_make_path(path, "cgroup.procs", NULL);
+ if (lxc_write_to_file(fullpath, pidstr, len, false) != 0) {
+ SYSERROR("Failed to attach %d to %s", (int)pid, fullpath);
+ free(fullpath);
+ free(path);
+ free_handler_data(d);
+ return false;
+ }
+ free(path);
+ free(fullpath);
+ }
+
+ free_handler_data(d);
+ return true;
+}
+
+/*
+ * Called externally (i.e. from 'lxc-cgroup') to query cgroup limits.
+ * Here we don't have a cgroup_data set up, so we ask the running
+ * container through the commands API for the cgroup path
+ */
+static int cgfsng_get(const char *filename, char *value, size_t len, const char *name, const char *lxcpath)
+{
+ char *subsystem, *p, *path;
+ struct cgfsng_handler_data *d;
+ struct hierarchy *h;
+ int ret = -1;
+
+ subsystem = alloca(strlen(filename) + 1);
+ strcpy(subsystem, filename);
+ if ((p = strchr(subsystem, '.')) != NULL)
+ *p = '\0';
+
+ path = lxc_cmd_get_cgroup_path(name, lxcpath, subsystem);
+ if (!path) // not running
+ return -1;
+
+ d = cgfsng_init(name);
+ if (!d) {
+ free(path);
+ return false;
+ }
+
+ h = get_hierarchy(d, subsystem);
+ if (h) {
+ char *fullpath = must_make_path(path, filename, NULL);
+ ret = lxc_read_from_file(fullpath, value, len);
+ free(fullpath);
+ }
+
+ free_handler_data(d);
+ free(path);
+
+ return ret;
+}
+
+/*
+ * Called externally (i.e. from 'lxc-cgroup') to set new cgroup limits.
+ * Here we don't have a cgroup_data set up, so we ask the running
+ * container through the commands API for the cgroup path
+ */
+static int cgfsng_set(const char *filename, const char *value, const char *name, const char *lxcpath)
+{
+ char *subsystem, *p, *path;
+ struct cgfsng_handler_data *d;
+ struct hierarchy *h;
+ int ret = -1;
+
+ subsystem = alloca(strlen(filename) + 1);
+ strcpy(subsystem, filename);
+ if ((p = strchr(subsystem, '.')) != NULL)
+ *p = '\0';
+
+ path = lxc_cmd_get_cgroup_path(name, lxcpath, subsystem);
+ if (!path) // not running
+ return -1;
+
+ d = cgfsng_init(name);
+ if (!d) {
+ free(path);
+ return false;
+ }
+
+ h = get_hierarchy(d, subsystem);
+ if (h) {
+ char *fullpath = must_make_path(path, filename, NULL);
+ ret = lxc_write_to_file(fullpath, value, strlen(value), false);
+ free(fullpath);
+ }
+
+ free_handler_data(d);
+ free(path);
+
+ return ret;
+}
+
+/*
+ * Check whether a container already has a particular rule, as otherwise
+ * may end up with spurious permission errors.
+ */
+static bool cgroup_devices_has_allow_or_deny(struct cgfsng_handler_data *d,
+ char *v, bool for_allow, char *path)
+{
+ FILE *devices_list;
+ char *line = NULL;
+ size_t sz = 0;
+ bool ret = !for_allow;
+
+ /* if it's a deny rule and container has all devices, then it doesn't
+ * yet have the deny rule */
+ if (!for_allow && strcmp(v, "a") != 0 && strcmp(v, "a *:* rwm") != 0)
+ return false;
+
+ devices_list = fopen_cloexec(path, "r");
+ if (!devices_list) {
+ free(path);
+ return false;
+ }
+
+ while (getline(&line, &sz, devices_list) != -1) {
+ size_t len = strlen(line);
+ if (len > 0 && line[len-1] == '\n')
+ line[len-1] = '\0';
+ if (strcmp(line, "a *:* rwm") == 0) {
+ /* if container has all access and we're adding allow rule,
+ * then already has it; if it has all access and we're
+ * adding a deny rule, then it does not. */
+ ret = for_allow;
+ goto out;
+ } else if (for_allow && strcmp(line, v) == 0) {
+ /* if the line is there verbatim and it is an
+ * allow rule, then it already has it */
+ ret = true;
+ goto out;
+ }
+ }
+
+out:
+ fclose(devices_list);
+ free(line);
+ return ret;
+}
+
+/*
+ * Called from setup_limits - here we have the container's cgroup_data because
+ * we created the cgroups
+ */
+static int lxc_cgroup_set_data(const char *filename, const char *value, struct cgfsng_handler_data *d)
+{
+ char *subsystem = NULL, *p;
+ int ret = -1;
+ struct hierarchy *h;
+
+ subsystem = alloca(strlen(filename) + 1);
+ strcpy(subsystem, filename);
+ if ((p = strchr(subsystem, '.')) != NULL)
+ *p = '\0';
+
+ h = get_hierarchy(d, subsystem);
+ if (h) {
+ char *fullpath = must_make_path(h->fullcgpath, filename, NULL);
+ ret = lxc_write_to_file(fullpath, value, strlen(value), false);
+ free(fullpath);
+ }
+ return ret;
+}
+
+static bool cgfsng_setup_limits(void *hdata, struct lxc_list *cgroup_settings,
+ bool do_devices)
+{
+ struct cgfsng_handler_data *d = hdata;
+ struct lxc_list *iterator, *sorted_cgroup_settings, *next;
+ struct lxc_cgroup *cg;
+ struct hierarchy *h;
+ char *listpath = NULL;
+ bool ret = false;
+
+ if (lxc_list_empty(cgroup_settings))
+ return true;
+
+ sorted_cgroup_settings = sort_cgroup_settings(cgroup_settings);
+ if (!sorted_cgroup_settings) {
+ return false;
+ }
+
+ if (do_devices) {
+ h = get_hierarchy(d, "devices");
+ if (!h) {
+ ERROR("No devices cgroup setup for %s\n", d->name);
+ return false;
+ }
+ listpath = must_make_path(h->fullcgpath, "devices.list", NULL);
+ }
+
+ lxc_list_for_each(iterator, sorted_cgroup_settings) {
+ cg = iterator->elem;
+
+ if (do_devices == !strncmp("devices", cg->subsystem, 7)) {
+ if (strcmp(cg->subsystem, "devices.deny") == 0 &&
+ cgroup_devices_has_allow_or_deny(d, cg->value, false, listpath))
+ continue;
+ if (strcmp(cg->subsystem, "devices.allow") == 0 &&
+ cgroup_devices_has_allow_or_deny(d, cg->value, true, listpath))
+ continue;
+ if (lxc_cgroup_set_data(cg->subsystem, cg->value, d)) {
+ if (do_devices && (errno == EACCES || errno == EPERM)) {
+ WARN("Error setting %s to %s for %s",
+ cg->subsystem, cg->value, d->name);
+ continue;
+ }
+ SYSERROR("Error setting %s to %s for %s",
+ cg->subsystem, cg->value, d->name);
+ goto out;
+ }
+ }
+
+ DEBUG("cgroup '%s' set to '%s'", cg->subsystem, cg->value);
+ }
+
+ ret = true;
+ INFO("cgroup has been setup");
+out:
+ free(listpath);
+ lxc_list_for_each_safe(iterator, sorted_cgroup_settings, next) {
+ lxc_list_del(iterator);
+ free(iterator);
+ }
+ free(sorted_cgroup_settings);
+ return ret;
+}
+
+static struct cgroup_ops cgfsng_ops = {
+ .init = cgfsng_init,
+ .destroy = cgfsng_destroy,
+ .create = cgfsng_create,
+ .enter = cgfsng_enter,
+ .canonical_path = cgfsng_canonical_path,
+ .escape = cgfsng_escape,
+ .get_cgroup = cgfsng_get_cgroup,
+ .get = cgfsng_get,
+ .set = cgfsng_set,
+ .unfreeze = cgfsng_unfreeze,
+ .setup_limits = cgfsng_setup_limits,
+ .name = "cgroupfs-ng",
+ .attach = cgfsng_attach,
+ .chown = cgfsns_chown,
+ .mount_cgroup = cgfsng_mount,
+ .nrtasks = cgfsng_nrtasks,
+ .driver = CGFSNG,
+
+ /* unsupported */
+ .create_legacy = NULL,
+};
diff --git a/src/lxc/cgmanager.c b/src/lxc/cgmanager.c
index 7a35d03..c387b00 100644
--- a/src/lxc/cgmanager.c
+++ b/src/lxc/cgmanager.c
@@ -299,7 +299,7 @@ static bool lxc_cgmanager_create(const char *controller, const char *cgroup_path
* be in "/lxc/c1" rather than "/user/..../c1"
* called internally with connection already open
*/
-static bool cgm_escape(void)
+static bool cgm_escape(void *hdata)
{
bool ret = true, cgm_needs_disconnect = false;
pid_t me = getpid();
@@ -1436,7 +1436,7 @@ struct cgroup_ops *cgm_ops_init(void)
cgm_all_controllers_same = false;
// if root, try to escape to root cgroup
- if (geteuid() == 0 && !cgm_escape()) {
+ if (geteuid() == 0 && !cgm_escape(NULL)) {
free_subsystems();
return NULL;
}
diff --git a/src/lxc/cgroup.c b/src/lxc/cgroup.c
index 5d67bd3..684a3c3 100644
--- a/src/lxc/cgroup.c
+++ b/src/lxc/cgroup.c
@@ -34,6 +34,7 @@ lxc_log_define(lxc_cgroup, lxc);
static struct cgroup_ops *ops = NULL;
extern struct cgroup_ops *cgfs_ops_init(void);
+extern struct cgroup_ops *cgfsng_ops_init(void);
extern struct cgroup_ops *cgm_ops_init(void);
__attribute__((constructor))
@@ -45,8 +46,10 @@ void cgroup_ops_init(void)
}
DEBUG("cgroup_init");
+ ops = cgfsng_ops_init();
#if HAVE_CGMANAGER
- ops = cgm_ops_init();
+ if (!ops)
+ ops = cgm_ops_init();
#endif
if (!ops)
ops = cgfs_ops_init();
@@ -109,10 +112,10 @@ const char *cgroup_get_cgroup(struct lxc_handler *handler, const char *subsystem
return NULL;
}
-bool cgroup_escape(void)
+bool cgroup_escape(struct lxc_handler *handler)
{
if (ops)
- return ops->escape();
+ return ops->escape(handler->cgroup_data);
return false;
}
diff --git a/src/lxc/cgroup.h b/src/lxc/cgroup.h
index 9919486..ff3651e 100644
--- a/src/lxc/cgroup.h
+++ b/src/lxc/cgroup.h
@@ -35,6 +35,7 @@ struct lxc_list;
typedef enum {
CGFS,
CGMANAGER,
+ CGFSNG,
} cgroup_driver_t;
struct cgroup_ops {
@@ -47,7 +48,7 @@ struct cgroup_ops {
bool (*create_legacy)(void *hdata, pid_t pid);
const char *(*get_cgroup)(void *hdata, const char *subsystem);
const char *(*canonical_path)(void *hdata);
- bool (*escape)(void);
+ bool (*escape)(void *hdata);
int (*set)(const char *filename, const char *value, const char *name, const char *lxcpath);
int (*get)(const char *filename, char *value, size_t len, const char *name, const char *lxcpath);
bool (*unfreeze)(void *hdata);
@@ -72,7 +73,7 @@ extern void cgroup_cleanup(struct lxc_handler *handler);
extern bool cgroup_create_legacy(struct lxc_handler *handler);
extern int cgroup_nrtasks(struct lxc_handler *handler);
extern const char *cgroup_get_cgroup(struct lxc_handler *handler, const char *subsystem);
-extern bool cgroup_escape(void);
+extern bool cgroup_escape(struct lxc_handler *handler);
/*
* Currently, this call only makes sense for privileged containers.
diff --git a/src/lxc/criu.c b/src/lxc/criu.c
index 6ef4905..25e8d70 100644
--- a/src/lxc/criu.c
+++ b/src/lxc/criu.c
@@ -47,7 +47,7 @@
lxc_log_define(lxc_criu, lxc);
-void exec_criu(struct criu_opts *opts)
+void exec_criu(struct lxc_handler *handler, struct criu_opts *opts)
{
char **argv, log[PATH_MAX];
int static_args = 22, argc = 0, i, ret;
@@ -63,7 +63,7 @@ void exec_criu(struct criu_opts *opts)
* /actual/ root cgroup so that lxcfs thinks criu has enough rights to
* see all cgroups.
*/
- if (!cgroup_escape()) {
+ if (!cgroup_escape(handler)) {
ERROR("failed to escape cgroups");
return;
}
@@ -517,7 +517,7 @@ void do_restore(struct lxc_container *c, int pipe, char *directory, bool verbose
os.cgroup_path = cgroup_canonical_path(handler);
/* exec_criu() returning is an error */
- exec_criu(&os);
+ exec_criu(handler, &os);
umount(rootfs->mount);
rmdir(rootfs->mount);
goto out_fini_handler;
@@ -624,6 +624,16 @@ static bool do_dump(struct lxc_container *c, char *mode, char *directory,
if (pid == 0) {
struct criu_opts os;
+ struct lxc_handler *handler;
+
+ handler = lxc_init(c->name, c->lxc_conf, c->config_path);
+ if (!handler)
+ exit(1);
+
+ if (!cgroup_init(handler)) {
+ ERROR("failed initing cgroups");
+ exit(1);
+ }
os.action = mode;
os.directory = directory;
@@ -633,7 +643,7 @@ static bool do_dump(struct lxc_container *c, char *mode, char *directory,
os.predump_dir = predump_dir;
/* exec_criu() returning is an error */
- exec_criu(&os);
+ exec_criu(handler, &os);
exit(1);
} else {
int status;
diff --git a/src/lxc/criu.h b/src/lxc/criu.h
index e35f98a..75e6381 100644
--- a/src/lxc/criu.h
+++ b/src/lxc/criu.h
@@ -58,7 +58,7 @@ struct criu_opts {
const char *cgroup_path;
};
-void exec_criu(struct criu_opts *opts);
+void exec_criu(struct lxc_handler *handler, struct criu_opts *opts);
/* Check and make sure the container has a configuration that we know CRIU can
* dump. */
diff --git a/src/lxc/utils.c b/src/lxc/utils.c
index 0bc7a20..6bee698 100644
--- a/src/lxc/utils.c
+++ b/src/lxc/utils.c
@@ -1771,3 +1771,25 @@ int null_stdfds(void)
close(fd);
return ret;
}
+
+/*
+ * Return the number of lines in file @fn, or -1 on error
+ */
+int lxc_count_file_lines(const char *fn)
+{
+ FILE *f;
+ char *line = NULL;
+ size_t sz = 0;
+ int n = 0;
+
+ f = fopen_cloexec(fn, "r");
+ if (!f)
+ return -1;
+
+ while (getline(&line, &sz, f) != -1) {
+ n++;
+ }
+ free(line);
+ fclose(f);
+ return n;
+}
diff --git a/src/lxc/utils.h b/src/lxc/utils.h
index 96ec45c..7d20a39 100644
--- a/src/lxc/utils.h
+++ b/src/lxc/utils.h
@@ -284,4 +284,5 @@ int safe_mount(const char *src, const char *dest, const char *fstype,
unsigned long flags, const void *data, const char *rootfs);
int mount_proc_if_needed(const char *rootfs);
int null_stdfds(void);
+int lxc_count_file_lines(const char *fn);
#endif /* __LXC_UTILS_H */
More information about the lxc-devel
mailing list