[lxc-devel] [PATCH 1/1] cgroups: rework to handle nested containers with multiple and partial mounts

Serge Hallyn serge.hallyn at ubuntu.com
Tue Aug 13 16:27:21 UTC 2013


Currently, if you create a container and use the mountcgruop hook,
you get the /lxc/c1/c1.real cgroup mounted to /.  If you then try
to start containers inside that container, lxc can get confused.
This patch addresses that, by accepting that the cgroup as found
in /proc/self/cgroup can be partially hidden by bind mounts.

In this patch:

Add optional 'lxc.cgroup.use' to /etc/lxc/lxc.conf to specify which
mounted cgroup filesystems lxc should use.  So far only the cgroup
creation respects this.

Keep separate cgroup information for each cgroup mountpoint.  So if
the caller is in devices cgroup /a but cpuset cgroup /b that should
now be ok.

Change how we decide whether to ignore failure to set devices cgroup
settings.  Actually look to see if our current cgroup already has the
settings.  If not, add them.

Finally, the real reason for this patch: in a nested container,
/proc/self/cgroup says nothing about where under /sys/fs/cgroup you
might find yourself.  Handle this by searching for our pid in tasks
files, and keep that info in the cgroup handler.

Also remove all strdupa from cgroup.c (not android-friendly).

Signed-off-by: Serge Hallyn <serge.hallyn at ubuntu.com>
---
 src/lxc/cgroup.c   | 1087 +++++++++++++++++++++++++++++++++-------------------
 src/lxc/cgroup.h   |   31 +-
 src/lxc/commands.c |   34 +-
 src/lxc/commands.h |    7 +-
 src/lxc/conf.c     |   41 --
 src/lxc/conf.h     |    2 -
 src/lxc/freezer.c  |   17 +-
 src/lxc/lxc.h      |    6 +-
 src/lxc/lxcutmp.c  |    2 +-
 src/lxc/start.c    |   32 +-
 src/lxc/start.h    |    4 +-
 src/tests/cgpath.c |  120 +-----
 12 files changed, 778 insertions(+), 605 deletions(-)

diff --git a/src/lxc/cgroup.c b/src/lxc/cgroup.c
index cd6cd1a..78083e8 100644
--- a/src/lxc/cgroup.c
+++ b/src/lxc/cgroup.c
@@ -29,6 +29,7 @@
 #include <string.h>
 #include <dirent.h>
 #include <fcntl.h>
+#include <ctype.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/param.h>
@@ -39,6 +40,8 @@
 #include "error.h"
 #include "config.h"
 #include "commands.h"
+#include "list.h"
+#include "conf.h"
 
 #include <lxc/log.h>
 #include <lxc/cgroup.h>
@@ -96,19 +99,18 @@ static char *mount_has_subsystem(const struct mntent *mntent)
 
 /*
  * Determine mountpoint for a cgroup subsystem.
- * @subsystem: cgroup subsystem (i.e. freezer).  If this is NULL, the first
- * cgroup mountpoint with any subsystems is used.
- * @mnt: a passed-in buffer of at least size MAXPATHLEN into which the path
+ * @dest: a passed-in buffer of at least size MAXPATHLEN into which the path
  * is copied.
+ * @subsystem: cgroup subsystem (i.e. freezer)
  *
- * Returns 0 on success, -1 on error.
+ * Returns true on success, false on error.
  */
-static int get_cgroup_mount(const char *subsystem, char *mnt)
+bool get_subsys_mount(char *dest, const char *subsystem)
 {
 	struct mntent mntent_r;
 	FILE *file = NULL;
-	int ret, err = -1;
-
+	int ret;
+	bool retv = false;
 	char buf[LARGE_MAXPATHLEN] = {0};
 
 	file = setmntent(MTAB, "r");
@@ -118,7 +120,7 @@ static int get_cgroup_mount(const char *subsystem, char *mnt)
 	}
 
 	while ((getmntent_r(file, &mntent_r, buf, sizeof(buf)))) {
-		if (strcmp(mntent_r.mnt_type, "cgroup") != 0)
+		if (strcmp(mntent_r.mnt_type, "cgroup"))
 			continue;
 
 		if (subsystem) {
@@ -129,12 +131,11 @@ static int get_cgroup_mount(const char *subsystem, char *mnt)
 				continue;
 		}
 
-		ret = snprintf(mnt, MAXPATHLEN, "%s", mntent_r.mnt_dir);
+		ret = snprintf(dest, MAXPATHLEN, "%s", mntent_r.mnt_dir);
 		if (ret < 0 || ret >= MAXPATHLEN)
 			goto fail;
 
-		DEBUG("using cgroup mounted at '%s'", mnt);
-		err = 0;
+		retv = true;
 		goto out;
 	};
 
@@ -143,78 +144,36 @@ fail:
 	      subsystem ? subsystem : "(NULL)");
 out:
 	endmntent(file);
-	return err;
+	return retv;
 }
 
 /*
- * cgroup_path_get: Get the absolute path to a particular subsystem,
- * plus a passed-in (to be appended) relative cgpath for a container.
- *
- * @subsystem : subsystem of interest (e.g. "freezer")
- * @cgrelpath : a container's relative cgroup path (e.g. "lxc/c1")
- *
- * Returns absolute path on success, NULL on error. The caller must free()
- * the returned path.
- *
- * Note that @subsystem may be the name of an item (e.g. "freezer.state")
- * in which case the subsystem will be determined by taking the string up
- * to the first '.'
+ * is_in_cgroup: check whether pid is found in the passed-in cgroup tasks
+ * file.
+ * @path:  in full path to a cgroup tasks file
+ * Note that in most cases the file will simply not exist, which is ok - it
+ * just means that's not our cgroup.
  */
-char *cgroup_path_get(const char *subsystem, const char *cgrelpath)
+static bool is_in_cgroup(pid_t pid, char *path)
 {
-	int rc;
-
-	char *buf = NULL;
-	char *cgabspath = NULL;
-
-	buf = malloc(MAXPATHLEN * sizeof(char));
-	if (!buf) {
-		ERROR("malloc failed");
-		goto out1;
-	}
-
-	cgabspath = malloc(MAXPATHLEN * sizeof(char));
-	if (!cgabspath) {
-		ERROR("malloc failed");
-		goto out2;
-	}
+	int cmppid;
+	FILE *f = fopen(path, "r");
+	char *line = NULL;
+	size_t sz = 0;
 
-	/* lxc_cgroup_set passes a state object for the subsystem,
-	 * so trim it to just the subsystem part */
-	if (subsystem) {
-		rc = snprintf(cgabspath, MAXPATHLEN, "%s", subsystem);
-		if (rc < 0 || rc >= MAXPATHLEN) {
-			ERROR("subsystem name too long");
-			goto err3;
+	if (!f)
+		return false;
+	while (getline(&line, &sz, f) != -1) {
+		if (sscanf(line, "%d", &cmppid) == 1 && cmppid == pid) {
+			fclose(f);
+			free(line);
+			return true;
 		}
-		char *s = index(cgabspath, '.');
-		if (s)
-			*s = '\0';
-		DEBUG("%s: called for subsys %s name %s\n", __func__,
-		      subsystem, cgrelpath);
-	}
-	if (get_cgroup_mount(subsystem ? cgabspath : NULL, buf)) {
-		ERROR("cgroup is not mounted");
-		goto err3;
 	}
-
-	rc = snprintf(cgabspath, MAXPATHLEN, "%s/%s", buf, cgrelpath);
-	if (rc < 0 || rc >= MAXPATHLEN) {
-		ERROR("name too long");
-		goto err3;
-	}
-
-	DEBUG("%s: returning %s for subsystem %s relpath %s", __func__,
-		cgabspath, subsystem, cgrelpath);
-	goto out2;
-
-err3:
-	free(cgabspath);
-	cgabspath = NULL;
-out2:
-	free(buf);
-out1:
-	return cgabspath;
+	fclose(f);
+	if (line)
+		free(line);
+	return false;
 }
 
 /*
@@ -235,16 +194,64 @@ out1:
 char *lxc_cgroup_path_get(const char *subsystem, const char *name,
 			  const char *lxcpath)
 {
-	char *cgabspath;
-	char *cgrelpath;
+	char *cgpath, *cgp, path[MAXPATHLEN], *pathp, *p;
+	pid_t initpid = lxc_cmd_get_init_pid(name, lxcpath);
+	int ret;
+
+	if (initpid < 0) {
+		ERROR("Error getting init pid for container %s:%s",
+			lxcpath, name);
+		return NULL;
+	}
 
-	cgrelpath = lxc_cmd_get_cgroup_path(name, lxcpath);
-	if (!cgrelpath)
+	cgpath = lxc_cmd_get_cgroup_path(name, lxcpath, subsystem);
+	if (!cgpath)
 		return NULL;
 
-	cgabspath = cgroup_path_get(subsystem, cgrelpath);
-	free(cgrelpath);
-	return cgabspath;
+	if (!get_subsys_mount(path, subsystem))
+		return NULL;
+
+	pathp = path + strlen(path);
+	/*
+	 * find a mntpt where i have the subsystem mounted, then find
+	 * a subset cgpath under that which has pid in it.
+	 *
+	 * If d->mntpt is '/a/b/c/d', and the mountpoint is /x/y/z,
+	 * then look for ourselves in:
+	 *    /x/y/z/a/b/c/d/tasks
+	 *    /x/y/z/b/c/d/tasks
+	 *    /x/y/z/c/d/tasks
+	 *    /x/y/z/d/tasks
+	 *    /x/y/z/tasks
+	 */
+	cgp = cgpath;
+	while (cgp[0]) {
+		ret = snprintf(pathp, MAXPATHLEN - (pathp - path), "%s/tasks", cgp);
+		if (ret < 0 || ret >= MAXPATHLEN)
+			return NULL;
+		if (!is_in_cgroup(initpid, path)) {
+			// does not exist, try the next one
+			cgp = index(cgp+1, '/');
+			if (!cgp)
+				break;
+			continue;
+		}
+		break;
+	}
+	if (!cgp || !*cgp) {
+		// try just the path
+		ret = snprintf(pathp, MAXPATHLEN - (pathp - path), "/tasks");
+		if (ret < 0 || ret >= MAXPATHLEN)
+			return NULL;
+		if (!is_in_cgroup(initpid, path)) {
+			return NULL;
+		}
+		return strdup("/");
+	}
+	// path still has 'tasks' on the end, drop it
+	if ((p = rindex(path, '/')) != NULL)
+		*p = '\0';
+	return strdup(path);
 }
 
 /*
@@ -277,6 +284,91 @@ static int do_cgroup_set(const char *path, const char *value)
 	return 0;
 }
 
+static bool cgroup_devices_has_deny(struct lxc_handler *h, char *v)
+{
+	char *cgabspath, path[MAXPATHLEN];
+	FILE *f;
+	char *line = NULL;
+	size_t len = 0;
+	bool ret = true;
+
+	// XXX FIXME if users could use something other than 'lxc.devices.deny = a'.
+	// not sure they ever do, but they *could*
+	// right now, I'm assuming they do NOT
+	if (strcmp(v, "a") && strcmp(v, "a *:* rwm"))
+		return false;
+	cgabspath = cgroup_get_subsys_path(h, "devices");
+	if (!cgabspath)
+		return -1;
+
+	ret = snprintf(path, MAXPATHLEN, "%s/devices.list", cgabspath);
+	if (ret < 0 || ret >= MAXPATHLEN) {
+		ERROR("pathname too long for devices.list");
+		return -1;
+	}
+
+	if (!(f = fopen(path, "r"))) {
+		SYSERROR("Error opening devices.list");
+		return -1;
+	}
+
+	while (getline(&line, &len, f) != -1) {
+		size_t len = strlen(line);
+		if (len > 0 && line[len-1] == '\n')
+			line[len-1] = '\0';
+		if (strcmp(line, "a *:* rwm") == 0) {
+			ret = false;
+			goto out;
+		}
+	}
+
+out:
+	fclose(f);
+	if (line)
+		free(line);
+	return ret;
+}
+
+static bool cgroup_devices_has_allow(struct lxc_handler *h, char *v)
+{
+	char *cgabspath, path[MAXPATHLEN];
+	bool ret = false;
+	FILE *f;
+	char *line = NULL;
+	size_t len = 0;
+
+	cgabspath = cgroup_get_subsys_path(h, "devices");
+	if (!cgabspath)
+		return -1;
+
+	ret = snprintf(path, MAXPATHLEN, "%s/devices.list", cgabspath);
+	if (ret < 0 || ret >= MAXPATHLEN) {
+		ERROR("pathname too long to for devices.list");
+		return -1;
+	}
+
+	if (!(f = fopen(path, "r"))) {
+		SYSERROR("Error opening devices.list");
+		return -1;
+	}
+
+	while (getline(&line, &len, f) != -1) {
+		size_t len = strlen(line);
+		if (len > 0 && line[len-1] == '\n')
+			line[len-1] = '\0';
+		if (strcmp(line, v) == 0) {
+			ret = true;
+			goto out;
+		}
+	}
+
+out:
+	if (line)
+		free(line);
+	fclose(f);
+	return ret;
+}
+
 /*
  * lxc_cgroup_set_bypath: Write a value into a cgroup file
  *
@@ -286,28 +378,29 @@ static int do_cgroup_set(const char *path, const char *value)
  *
  * Returns 0 on success, < 0 on error.
  */
-int lxc_cgroup_set_bypath(const char *cgrelpath, const char *filename, const char *value)
+int lxc_cgroup_set_value(struct lxc_handler *handler, const char *filename,
+			const char *value)
 {
+	char *cgabspath, path[MAXPATHLEN], *p;
 	int ret;
-	char *cgabspath;
-	char path[MAXPATHLEN];
 
-	cgabspath = cgroup_path_get(filename, cgrelpath);
+	ret = snprintf(path, MAXPATHLEN, "%s", filename);
+	if (ret < 0 || ret >= MAXPATHLEN)
+		return -1;
+	if ((p = index(path, '.')) != NULL)
+		*p = '\0';
+	cgabspath = cgroup_get_subsys_path(handler, path);
 	if (!cgabspath)
 		return -1;
 
 	ret = snprintf(path, MAXPATHLEN, "%s/%s", cgabspath, filename);
 	if (ret < 0 || ret >= MAXPATHLEN) {
-		ERROR("pathname too long");
-		ret = -1;
-		goto out;
+		ERROR("pathname too long to set cgroup value %s to %s",
+			filename, value);
+		return -1;
 	}
 
-	ret = do_cgroup_set(path, value);
-
-out:
-	free(cgabspath);
-	return ret;
+	return do_cgroup_set(path, value);
 }
 
 /*
@@ -326,8 +419,13 @@ int lxc_cgroup_set(const char *name, const char *filename, const char *value,
 	int ret;
 	char *cgabspath;
 	char path[MAXPATHLEN];
+	char *subsystem = alloca(strlen(filename)+1), *p;
+	strcpy(subsystem, filename);
+
+	if ((p = index(subsystem, '.')) != NULL)
+		*p = '\0';
 
-	cgabspath = lxc_cgroup_path_get(filename, name, lxcpath);
+	cgabspath = lxc_cgroup_path_get(subsystem, name, lxcpath);
 	if (!cgabspath)
 		return -1;
 
@@ -370,8 +468,14 @@ int lxc_cgroup_get(const char *name, const char *filename, char *value,
 	int fd, ret;
 	char *cgabspath;
 	char path[MAXPATHLEN];
+	char *subsystem = alloca(strlen(filename)+1), *p;
 
-	cgabspath = lxc_cgroup_path_get(filename, name, lxcpath);
+	strcpy(subsystem, filename);
+
+	if ((p = index(subsystem, '.')) != NULL)
+		*p = '\0';
+
+	cgabspath = lxc_cgroup_path_get(subsystem, name, lxcpath);
 	if (!cgabspath)
 		return -1;
 
@@ -410,29 +514,27 @@ out:
 	return ret;
 }
 
-int lxc_cgroup_nrtasks(const char *cgrelpath)
+int lxc_cgroup_nrtasks(struct lxc_handler *handler)
 {
-	char *cgabspath = NULL;
 	char path[MAXPATHLEN];
 	int pid, ret;
 	FILE *file;
 
-	cgabspath = cgroup_path_get(NULL, cgrelpath);
-	if (!cgabspath)
+	if (!handler->cgroup)
 		return -1;
 
-	ret = snprintf(path, MAXPATHLEN, "%s/tasks", cgabspath);
+	/* XXX Should we use a specific subsystem rather than the first one we
+	 * found (handler->cgroup->curcgroup)? */
+	ret = snprintf(path, MAXPATHLEN, "%s/tasks", handler->cgroup->curcgroup);
 	if (ret < 0 || ret >= MAXPATHLEN) {
 		ERROR("pathname too long");
-		ret = -1;
-		goto out;
+		return -1;
 	}
 
 	file = fopen(path, "r");
 	if (!file) {
 		SYSERROR("fopen '%s' failed", path);
-		ret = -1;
-		goto out;
+		return -1;
 	}
 
 	ret = 0;
@@ -440,28 +542,35 @@ int lxc_cgroup_nrtasks(const char *cgrelpath)
 		ret++;
 
 	fclose(file);
-
-out:
-	free(cgabspath);
 	return ret;
 }
 
-/*
- * If first creating the /sys/fs/cgroup/$subsys/lxc container, then
- * try to set clone_children to 1.  Some kernels don't support
- * clone_children, and cgroup maintainer wants to deprecate it.  So
- * XXX TODO we should instead after each cgroup mkdir (here and in
- * hooks/mountcgroup) check if cpuset is in the subsystems, and if so
- * manually copy over mems and cpus.
- */
-static void set_clone_children(const char *mntdir)
+static int in_subsys_list(const char *s, const char *list)
+{
+	char *token, *str, *saveptr = NULL;
+
+	if (!list || !s)
+		return 0;
+
+	str = alloca(strlen(list)+1);
+	strcpy(str, list);
+	for (; (token = strtok_r(str, ",", &saveptr)); str = NULL) {
+		if (strcmp(s, token) == 0)
+			return 1;
+	}
+
+	return 0;
+}
+
+static void set_clone_children(struct mntent *m)
 {
 	char path[MAXPATHLEN];
 	FILE *fout;
 	int ret;
 
-	ret = snprintf(path, MAXPATHLEN, "%s/cgroup.clone_children", mntdir);
-	INFO("writing to %s\n", path);
+	if (!in_subsys_list("cpuset", m->mnt_opts))
+		return;
+	ret = snprintf(path, MAXPATHLEN, "%s/cgroup.clone_children", m->mnt_dir);
 	if (ret < 0 || ret > MAXPATHLEN)
 		return;
 	fout = fopen(path, "w");
@@ -471,7 +580,59 @@ static void set_clone_children(const char *mntdir)
 	fclose(fout);
 }
 
-static char *get_all_cgroups(void)
+static bool have_visited(char *opts, char *visited, char *all_subsystems)
+{
+	char *str, *s = NULL, *token;
+
+	str = alloca(strlen(opts)+1);
+	strcpy(str, opts);
+	for (; (token = strtok_r(str, ",", &s)); str = NULL) {
+		if (!in_subsys_list(token, all_subsystems))
+			continue;
+		if (visited && in_subsys_list(token, visited))
+			return true;
+	}
+
+	return false;
+}
+
+static bool is_in_desclist(struct cgroup_desc *d, char *opts, char *all_subsystems)
+{
+	while (d) {
+		if (have_visited(opts, d->subsystems, all_subsystems))
+			return true;
+		d = d->next;
+	}
+	return false;
+}
+
+static char *record_visited(char *opts, char *all_subsystems)
+{
+	char *s = NULL, *token, *str;
+	int oldlen = 0, newlen, toklen;
+	char *visited = NULL;
+
+	str = alloca(strlen(opts)+1);
+	strcpy(str, opts);
+	for (; (token = strtok_r(str, ",", &s)); str = NULL) {
+		if (!in_subsys_list(token, all_subsystems))
+			continue;
+		toklen = strlen(token);
+		newlen = oldlen + toklen +  1; // ',' + token or token + '\0'
+		visited = realloc(visited, newlen);
+		if (!visited)
+			return (char *)-ENOMEM;
+		if (oldlen)
+			strcat(visited, ",");
+		else
+			*visited = '\0';
+		strcat(visited, token);
+	}
+
+	return visited;
+}
+ 
+static char *get_all_subsystems(void)
 {
 	FILE *f;
 	char *line = NULL, *ret = NULL;
@@ -518,112 +679,217 @@ out:
 	return ret;
 }
 
-static int in_cgroup_list(char *s, char *list)
+/*
+ * /etc/lxc/lxc.conf can contain lxc.cgroup.use = entries.
+ * If any of those are present, then lxc will ONLY consider
+ * cgroup filesystems mounted at one of the listed entries.
+ */
+static char *get_cgroup_uselist()
 {
-	char *token, *str, *saveptr = NULL;
-
-	if (!list || !s)
-		return 0;
+	FILE *f;
+	char *line = NULL, *ret = NULL;
+	size_t sz = 0, retsz = 0, newsz;
 
-	for (str = strdupa(list); (token = strtok_r(str, ",", &saveptr)); str = NULL) {
-		if (strcmp(s, token) == 0)
-			return 1;
+	if ((f = fopen(LXC_GLOBAL_CONF, "r")) == NULL)
+		return NULL;
+	while (getline(&line, &sz, f) != -1) {
+		char *p = line;
+		while (*p && isblank(*p))
+			p++;
+		if (strncmp(p, "lxc.cgroup.use", 14) != 0)
+			continue;
+		p = index(p, '=');
+		if (!p)
+			continue;
+		p++;
+		while (*p && isblank(*p))
+			p++;
+		if (strlen(p) < 1)
+			continue;
+		newsz = retsz + strlen(p);
+		if (retsz == 0)
+			newsz += 1;  // for trailing \0
+		// the last line in the file could lack \n
+		if (p[strlen(p)-1] != '\n')
+			newsz += 1;
+		ret = realloc(ret, newsz);
+		if (!ret) {
+			ERROR("Out of memory reading cgroup uselist");
+			fclose(f);
+			free(line);
+			return (char *)-ENOMEM;
+		}
+		if (retsz == 0)
+			strcpy(ret, p);
+		else
+			strcat(ret, p);
+		if (p[strlen(p)-1] != '\n')
+			ret[newsz-2] = '\0';
+		ret[newsz-1] = '\0';
+		retsz = newsz;
 	}
 
-	return 0;
+	if (line)
+		free(line);
+	return ret;
 }
 
-static int have_visited(char *opts, char *visited, char *allcgroups)
+static bool is_in_uselist(char *uselist, struct mntent *m)
 {
-	char *str, *s = NULL, *token;
-
-	for (str = strdupa(opts); (token = strtok_r(str, ",", &s)); str = NULL) {
-		if (!in_cgroup_list(token, allcgroups))
-			continue;
-		if (visited && in_cgroup_list(token, visited))
-			return 1;
+	char *p;
+	if (!uselist)
+		return true;
+	if (!*uselist)
+		return false;
+	while (*uselist) {
+		p = index(uselist, '\n');
+		if (strncmp(m->mnt_dir, uselist, p - uselist) == 0)
+			return true;
+		uselist = p+1;
 	}
-
-	return 0;
+	return false;
 }
 
-static int record_visited(char *opts, char **visitedp, char *allcgroups)
+static bool find_real_cgroup(struct cgroup_desc *d, char *path)
 {
-	char *s = NULL, *token, *str;
-	int oldlen, newlen, ret;
+	FILE *f;
+	char *line = NULL, *p, *p2;
+	int ret = 0;
+	size_t len;
 
-	for (str = strdupa(opts); (token = strtok_r(str, ",", &s)); str = NULL) {
-		if (!in_cgroup_list(token, allcgroups))
+	if ((f = fopen("/proc/self/cgroup", "r")) == NULL) {
+		SYSERROR("Error opening /proc/self/cgroups");
+		return false;
+	}
+
+	// If there is no subsystem, ignore the mount.  Note we may want
+	// to change this, so that unprivileged users can use a unbound
+	// cgroup mount to arrange their container tasks.
+	if (!d->subsystems) {
+		fclose(f);
+		return false;
+	}
+	while (getline(&line, &len, f) != -1) {
+		if (!(p = index(line, ':')))
 			continue;
-		if (*visitedp && in_cgroup_list(token, *visitedp))
+		if (!(p2 = index(++p, ':')))
 			continue;
-		oldlen = (*visitedp) ? strlen(*visitedp) : 0;
-		newlen = oldlen + strlen(token) + 2;
-		(*visitedp) = realloc(*visitedp, newlen);
-		if (!(*visitedp))
-			return -1;
-		ret = snprintf((*visitedp)+oldlen, newlen, ",%s", token);
-		if (ret < 0 || ret >= newlen)
-			return -1;
-	}
+		*p2 = '\0';
+		// in case of multiple mounts it may be more correct to
+		// insist all subsystems be the same
+		if (in_subsys_list(p, d->subsystems))
+			goto found;
+       }
 
-	return 0;
+	if (line)
+		free(line);
+	fclose(f);
+	return false;;
+
+found:
+	fclose(f);
+	ret = snprintf(path, MAXPATHLEN, "%s", p2+1);
+	if (ret < 0 || ret >= MAXPATHLEN) {
+		free(line);
+		return false;
+	}
+	free(line);
+	return true;
 }
 
+
 /*
- * Make sure the 'cgroup group' exists, so that we don't have to worry about
- * that later.
- *
- * @lxcgroup: the cgroup group, i.e. 'lxc' by default.
- *
- * See detailed comments at lxc_cgroup_path_create for more information.
- *
- * Returns 0 on success, -1 on error.
+ * for a given cgroup mount entry, and a to-be-created container,
+ * 1. Figure out full path of the cgroup we are currently in,
+ * 2. Find a new free cgroup which is $path / $lxc_name with an
+ *    optional '-$n' where n is an ever-increasing integer.
  */
-static int create_lxcgroups(const char *lxcgroup)
+static char *find_free_cgroup(struct cgroup_desc *d, const char *lxc_name)
 {
-	FILE *file = NULL;
-	struct mntent mntent_r;
-	int ret, retv = -1;
-	char path[MAXPATHLEN];
-
-	char buf[LARGE_MAXPATHLEN] = {0};
+	char tail[20], cgpath[MAXPATHLEN], *cgp, path[MAXPATHLEN];
+	int i = 0, ret;
+	size_t l;
 
-	file = setmntent(MTAB, "r");
-	if (!file) {
-		SYSERROR("failed to open %s", MTAB);
-		return -1;
+	if (!find_real_cgroup(d, cgpath)) {
+		ERROR("Failed to find current cgroup");
+		return NULL;
 	}
 
-	while ((getmntent_r(file, &mntent_r, buf, sizeof(buf)))) {
-
-		if (strcmp(mntent_r.mnt_type, "cgroup"))
-			continue;
-		if (!mount_has_subsystem(&mntent_r))
-			continue;
-
-		/*
-		 * TODO - handle case where lxcgroup has subdirs?  (i.e. build/l1)
-		 * We probably only want to support that for /users/joe
-		 */
-		ret = snprintf(path, MAXPATHLEN, "%s/%s", mntent_r.mnt_dir, lxcgroup);
+	/*
+	 * If d->mntpt is '/a/b/c/d', and the mountpoint is /x/y/z,
+	 * then look for ourselves in:
+	 *    /x/y/z/a/b/c/d/tasks
+	 *    /x/y/z/b/c/d/tasks
+	 *    /x/y/z/c/d/tasks
+	 *    /x/y/z/d/tasks
+	 *    /x/y/z/tasks
+	 */
+	cgp = cgpath;
+	while (cgp[0]) {
+		ret = snprintf(path, MAXPATHLEN, "%s%s/tasks", d->mntpt, cgp);
 		if (ret < 0 || ret >= MAXPATHLEN)
-			goto fail;
-		if (access(path, F_OK)) {
-			set_clone_children(mntent_r.mnt_dir);
-			ret = mkdir(path, 0755);
-			if (ret == -1 && errno != EEXIST) {
-				SYSERROR("failed to create '%s' directory", path);
-				goto fail;
-			}
+			return NULL;
+		if (!is_in_cgroup(getpid(), path)) {
+			// does not exist, try the next one
+			cgp = index(cgp+1, '/');
+			if (!cgp)
+				break;
+			continue;
 		}
+		break;
+	}
+	if (!cgp || !*cgp) {
+		// try just the path
+		ret = snprintf(path, MAXPATHLEN, "%s/tasks", d->mntpt);
+		if (ret < 0 || ret >= MAXPATHLEN)
+			return NULL;
+		if (!is_in_cgroup(getpid(), path))
+			return NULL;
+	}
+	// found it
+	// path has '/tasks' at end, drop that
+	if (!(cgp = rindex(path, '/'))) {
+		ERROR("Got nonsensical path name %s\n", path);
+		return NULL;
+	}
+	*cgp = '\0';
 
+	if (strlen(path) + strlen(lxc_name) + 20 > MAXPATHLEN) {
+		ERROR("Error: cgroup path too long");
+		return NULL;
+	}
+	tail[0] = '\0';
+	while (1) {
+		struct stat sb;
+		int freebytes = MAXPATHLEN - (cgp - path);
+
+		if (i) {
+			ret = snprintf(tail, 20, "-%d", i);
+			if (ret < 0 || ret >= 20)
+				return NULL;
+		}
+		ret = snprintf(cgp, freebytes, "/%s%s", lxc_name, tail);
+		if (ret < 0 || ret >= freebytes)
+			return NULL;
+		if (stat(path, &sb) == -1)
+			break;
+		i++;
 	}
 
-	retv = 0;
-fail:
-	endmntent(file);
-	return retv;
+	l = strlen(cgpath);
+	ret = snprintf(cgpath + l, MAXPATHLEN - l, "/%s%s", lxc_name, tail);
+	if (ret < 0 || ret >= (MAXPATHLEN - l)) {
+		ERROR("Out of memory");
+		return NULL;
+	}
+	if ((d->realcgroup = strdup(cgpath)) == NULL) {
+		ERROR("Out of memory");
+		return NULL;
+	}
+	l = strlen(d->realcgroup);
+	if (l > 0 && d->realcgroup[l-1] == '\n')
+		d->realcgroup[l-1] = '\0';
+	return strdup(path);
 }
 
 /*
@@ -646,152 +912,163 @@ fail:
  * is at /sys/fs/cgroup/freezer, and this fn returns '/lxc/r1', then the
  * freezer cgroup's full path will be /sys/fs/cgroup/freezer/lxc/r1/.
  *
- * XXX This should probably be locked globally
- *
  * Races won't be determintal, you'll just end up with leftover unused cgroups
  */
-char *lxc_cgroup_path_create(const char *lxcgroup, const char *name)
+struct cgroup_desc *lxc_cgroup_path_create(const char *name)
 {
-	int i = 0, ret;
-	char *retpath, path[MAXPATHLEN];
-	char tail[12];
+	struct cgroup_desc *retdesc = NULL, *newdesc = NULL;
 	FILE *file = NULL;
 	struct mntent mntent_r;
-	char *allcgroups = get_all_cgroups();
-	char *visited = NULL;
-
 	char buf[LARGE_MAXPATHLEN] = {0};
+	char *all_subsystems = get_all_subsystems();
+	char *cgroup_uselist = get_cgroup_uselist();
 
-	if (!lxcgroup || strlen(lxcgroup) == 0 || strcmp(lxcgroup, "/") == 0)
-		lxcgroup = "lxc";
-	if (!allcgroups)
+	if (cgroup_uselist == (char *)-ENOMEM) {
+		if (all_subsystems)
+			free(all_subsystems);
+		return NULL;
+	}
+	if (!all_subsystems) {
+		ERROR("failed to get a list of all cgroup subsystems");
+		if (cgroup_uselist)
+			free(cgroup_uselist);
 		return NULL;
-
-	if (create_lxcgroups(lxcgroup) < 0)
-		goto err1;
-
-again:
-	if (visited) {
-		/* we're checking for a new name, so start over with all cgroup
-		 * mounts */
-		free(visited);
-		visited = NULL;
 	}
 	file = setmntent(MTAB, "r");
 	if (!file) {
 		SYSERROR("failed to open %s", MTAB);
-		goto err1;
+		free(all_subsystems);
+		if (cgroup_uselist)
+			free(cgroup_uselist);
+		return NULL;
 	}
 
-	if (i)
-		snprintf(tail, 12, "-%d", i);
-	else
-		*tail = '\0';
-
 	while ((getmntent_r(file, &mntent_r, buf, sizeof(buf)))) {
 
 		if (strcmp(mntent_r.mnt_type, "cgroup"))
 			continue;
-		if (!mount_has_subsystem(&mntent_r))
+
+		if (cgroup_uselist && !is_in_uselist(cgroup_uselist, &mntent_r))
 			continue;
 
 		/* make sure we haven't checked this subsystem already */
-		if (have_visited(mntent_r.mnt_opts, visited, allcgroups))
+		if (is_in_desclist(retdesc, mntent_r.mnt_opts, all_subsystems))
 			continue;
-		if (record_visited(mntent_r.mnt_opts, &visited, allcgroups) < 0)
-			goto fail;
 
-		/* find unused mnt_dir + lxcgroup + name + -$i */
-		ret = snprintf(path, MAXPATHLEN, "%s/%s/%s%s", mntent_r.mnt_dir,
-			       lxcgroup, name, tail);
-		if (ret < 0 || ret >= MAXPATHLEN)
+		if (!(newdesc = malloc(sizeof(struct cgroup_desc)))) {
+			ERROR("Out of memory reading cgroups");
 			goto fail;
+		}
+		newdesc->subsystems = record_visited(mntent_r.mnt_opts, all_subsystems);
+		if (newdesc->subsystems == (char *)-ENOMEM) {
+			ERROR("Out of memory recording cgroup subsystems");
+			free(newdesc);
+			newdesc = NULL;
+			goto fail;
+		}
+		if (!newdesc->subsystems) {
+			free(newdesc);
+			newdesc = NULL;
+			continue;
+		}
+		newdesc->mntpt = strdup(mntent_r.mnt_dir);
+		newdesc->realcgroup = NULL;
+		newdesc->curcgroup = find_free_cgroup(newdesc, name);
+		if (!newdesc->mntpt || !newdesc->curcgroup) {
+			ERROR("Out of memory reading cgroups");
+			goto fail;
+		}
 
-		INFO("lxcgroup %s name %s tail %s, makes path .%s.",
-			lxcgroup, name, tail, path);
-
-		if (access(path, F_OK) == 0) goto next;
+		set_clone_children(&mntent_r);
 
-		if (mkdir(path, 0755)) {
-			ERROR("Error creating cgroup %s", path);
+		if (mkdir(newdesc->curcgroup, 0755)) {
+			ERROR("Error creating cgroup %s", newdesc->curcgroup);
 			goto fail;
 		}
-
+		newdesc->next = retdesc;
+		retdesc = newdesc;
 	}
 
 	endmntent(file);
-
-	// print out the cgpath part
-	ret = snprintf(path, MAXPATHLEN, "%s/%s%s", lxcgroup, name, tail);
-	if (ret < 0 || ret >= MAXPATHLEN) // can't happen
-		goto fail;
-
-	retpath = strdup(path);
-	free(allcgroups);
-	if (visited)
-		free(visited);
-
-	return retpath;
-
-next:
-	endmntent(file);
-	i++;
-	goto again;
+	free(all_subsystems);
+	if (cgroup_uselist)
+		free(cgroup_uselist);
+	return retdesc;
 
 fail:
 	endmntent(file);
-err1:
-	free(allcgroups);
-	if (visited)
-		free(visited);
+	free(all_subsystems);
+	if (cgroup_uselist)
+		free(cgroup_uselist);
+	if (newdesc) {
+		if (newdesc->mntpt)
+			free(newdesc->mntpt);
+		if (newdesc->subsystems)
+			free(newdesc->subsystems);
+		if (newdesc->curcgroup)
+			free(newdesc->curcgroup);
+		if (newdesc->realcgroup)
+			free(newdesc->realcgroup);
+		free(newdesc);
+	}
+	while (retdesc) {
+		struct cgroup_desc *t = retdesc;
+		retdesc = retdesc->next;
+		if (t->mntpt)
+			free(t->mntpt);
+		if (t->subsystems)
+			free(t->subsystems);
+		if (t->curcgroup)
+			free(t->curcgroup);
+		if (t->realcgroup)
+			free(t->realcgroup);
+		free(t);
+
+	}
 	return NULL;
 }
 
-int lxc_cgroup_enter(const char *cgpath, pid_t pid)
+static bool lxc_cgroup_enter_one(const char *dir, int pid)
 {
 	char path[MAXPATHLEN];
-	FILE *file = NULL, *fout;
-	struct mntent mntent_r;
-	int ret, retv = -1;
-	char buf[LARGE_MAXPATHLEN] = {0};
+	int ret;
+	FILE *fout;
 
-	file = setmntent(MTAB, "r");
-	if (!file) {
-		SYSERROR("failed to open %s", MTAB);
-		return -1;
+	ret = snprintf(path, MAXPATHLEN, "%s/tasks", dir);
+	if (ret < 0 || ret >= MAXPATHLEN) {
+		ERROR("Error entering cgroup");
+		return false;
 	}
-
-	while ((getmntent_r(file, &mntent_r, buf, sizeof(buf)))) {
-		if (strcmp(mntent_r.mnt_type, "cgroup"))
-			continue;
-		if (!mount_has_subsystem(&mntent_r))
-			continue;
-		ret = snprintf(path, MAXPATHLEN, "%s/%s/tasks",
-			       mntent_r.mnt_dir, cgpath);
-		if (ret < 0 || ret >= MAXPATHLEN) {
-			ERROR("Error entering cgroup");
-			goto out;
-		}
-		fout = fopen(path, "w");
-		if (!fout) {
-			SYSERROR("Error entering cgroup");
-			goto out;
-		}
-		if (fprintf(fout, "%d\n", (int)pid) < 0) {
-			ERROR("Error writing pid to %s", path);
-			fclose(fout);
-			goto out;
-		}
-		if (fclose(fout) < 0) {
-			SYSERROR("Error writing pid to %s", path);
-			goto out;
-		}
+	fout = fopen(path, "w");
+	if (!fout) {
+		SYSERROR("Error entering cgroup");
+		return false;
+	}
+	if (fprintf(fout, "%d\n", (int)pid) < 0) {
+		ERROR("Error writing pid to %s to enter cgroup", path);
+		fclose(fout);
+		return false;
+	}
+	if (fclose(fout) < 0) {
+		SYSERROR("Error writing pid to %s to enter cgroup", path);
+		return false;
 	}
-	retv = 0;
 
-out:
-	endmntent(file);
-	return retv;
+	return true;
+}
+
+int lxc_cgroup_enter(struct cgroup_desc *cgroups, pid_t pid)
+{
+	while (cgroups) {
+		if (!cgroups->subsystems)
+			goto next;
+
+		if (!lxc_cgroup_enter_one(cgroups->curcgroup, pid))
+			return -1;
+next:
+		cgroups = cgroups->next;
+	}
+	return 0;
 }
 
 static int cgroup_rmdir(char *dirname)
@@ -815,7 +1092,7 @@ static int cgroup_rmdir(char *dirname)
 			break;
 
 		if (!strcmp(direntp->d_name, ".") ||
-		    !strcmp(direntp->d_name, ".."))
+				!strcmp(direntp->d_name, ".."))
 			continue;
 
 		rc = snprintf(pathname, MAXPATHLEN, "%s/%s", dirname, direntp->d_name);
@@ -837,85 +1114,87 @@ static int cgroup_rmdir(char *dirname)
 	return ret;
 }
 
-static int lxc_one_cgroup_destroy(struct mntent *mntent, const char *cgpath)
-{
-	char cgname[MAXPATHLEN];
-	char *cgmnt = mntent->mnt_dir;
-	int rc;
-
-	rc = snprintf(cgname, MAXPATHLEN, "%s/%s", cgmnt, cgpath);
-	if (rc < 0 || rc >= MAXPATHLEN) {
-		ERROR("name too long");
-		return -1;
-	}
-	DEBUG("destroying %s\n", cgname);
-	if (cgroup_rmdir(cgname)) {
-		SYSERROR("failed to remove cgroup '%s'", cgname);
-		return -1;
-	}
-
-	DEBUG("'%s' unlinked", cgname);
-
-	return 0;
-}
-
 /*
  * for each mounted cgroup, destroy the cgroup for the container
  */
-int lxc_cgroup_destroy(const char *cgpath)
+void lxc_cgroup_destroy_desc(struct cgroup_desc *cgroups)
 {
-	struct mntent mntent_r;
-	FILE *file = NULL;
-	int err, retv  = 0;
-
-	char buf[LARGE_MAXPATHLEN] = {0};
-
-	file = setmntent(MTAB, "r");
-	if (!file) {
-		SYSERROR("failed to open %s", MTAB);
-		return -1;
+	while (cgroups) {
+		struct cgroup_desc *next = cgroups->next;
+		if (cgroup_rmdir(cgroups->curcgroup) < 0)
+			SYSERROR("Error removing cgroup directory %s", cgroups->curcgroup);
+		free(cgroups->mntpt);
+		free(cgroups->subsystems);
+		free(cgroups->curcgroup);
+		free(cgroups->realcgroup);
+		free(cgroups);
+		cgroups = next;
 	}
-
-	while ((getmntent_r(file, &mntent_r, buf, sizeof(buf)))) {
-		if (strcmp(mntent_r.mnt_type, "cgroup"))
-			continue;
-		if (!mount_has_subsystem(&mntent_r))
-			continue;
-
-		err = lxc_one_cgroup_destroy(&mntent_r, cgpath);
-		if (err)  // keep trying to clean up the others
-			retv = -1;
-	}
-
-	endmntent(file);
-	return retv;
 }
 
 int lxc_cgroup_attach(pid_t pid, const char *name, const char *lxcpath)
 {
-	int ret;
+	FILE *f;
+	char *line = NULL, ret = -1;
+	size_t len = 0;
+	int first = 1;
 	char *dirpath;
 
-	dirpath = lxc_cmd_get_cgroup_path(name, lxcpath);
-	if (!dirpath) {
-		ERROR("Error getting cgroup for container %s: %s", lxcpath, name);
-		return -1;
+	/* read the list of subsystems from the kernel */
+	f = fopen("/proc/cgroups", "r");
+	if (!f)
+		return ret;
+
+	while (getline(&line, &len, f) != -1) {
+		char *c;
+
+		/* skip the first line */
+		if (first) {
+			first=0;
+			continue;
+		}
+
+		c = strchr(line, '\t');
+		if (!c)
+			continue;
+		*c = '\0';
+		dirpath = lxc_cgroup_path_get(line, name, lxcpath);
+		if (!dirpath)
+			continue;
+
+		INFO("joining pid %d to cgroup %s", pid, dirpath);
+		if (lxc_cgroup_enter_one(dirpath, pid)) {
+			ERROR("Failed joining %d to %s\n", pid, dirpath);
+			continue;
+		}
 	}
-	INFO("joining pid %d to cgroup %s", pid, dirpath);
 
-	ret = lxc_cgroup_enter(dirpath, pid);
-	free(dirpath);
+	if (line)
+		free(line);
+	fclose(f);
 	return ret;
 }
 
-bool is_in_subcgroup(int pid, const char *subsystem, const char *cgpath)
+bool is_in_subcgroup(int pid, const char *subsystem, struct cgroup_desc *d)
 {
 	char filepath[MAXPATHLEN], *line = NULL, v1[MAXPATHLEN], v2[MAXPATHLEN];
 	FILE *f;
 	int ret, junk;
-	size_t sz = 0, l1 = strlen(cgpath), l2;
+	size_t sz = 0, l1, l2;
 	char *end = index(subsystem, '.');
 	int len = end ? (end - subsystem) : strlen(subsystem);
+	const char *cgpath = NULL;
+
+	while (d) {
+		if (in_subsys_list("devices", d->subsystems)) {
+			cgpath = d->realcgroup;
+			l1 = strlen(cgpath);
+			break;
+		}
+		d = d->next;
+	}
+	if (!d)
+		return false;
 
 	ret = snprintf(filepath, MAXPATHLEN, "/proc/%d/cgroup", pid);
 	if (ret < 0 || ret >= MAXPATHLEN)
@@ -928,6 +1207,7 @@ bool is_in_subcgroup(int pid, const char *subsystem, const char *cgpath)
 		ret = sscanf(line, "%d:%[^:]:%s", &junk, v1, v2);
 		if (ret != 3) {
 			fclose(f);
+			free(line);
 			return false;
 		}
 		len = end ? end - subsystem : strlen(subsystem);
@@ -938,55 +1218,70 @@ bool is_in_subcgroup(int pid, const char *subsystem, const char *cgpath)
 		l2 = strlen(v2+1);
 		if (l2 > l1 && strncmp(v2+1, cgpath, l1) == 0) {
 			fclose(f);
+			free(line);
 			return true;
 		}
 	}
 	fclose(f);
+	if (line)
+		free(line);
 	return false;
 }
 
-/*
- * Return cgroup of current task.
- * This assumes that task is in the same cgroup for each controller.  This
- * may or may not *always* be a reasonable assumption - it generally is,
- * and handling or at least checking for this condition is a TODO.
- */
-int lxc_curcgroup(char *cgroup, int inlen)
+char *cgroup_get_subsys_path(struct lxc_handler *handler, const char *subsys)
 {
-	FILE *f;
-	char *line = NULL, *p, *p2;
-	int ret = 0;
-	size_t len;
+	struct cgroup_desc *d;
 
-	f = fopen("/proc/self/cgroup", "r");
-	if (!f)
-		return -1;
+	for (d = handler->cgroup; d; d = d->next) {
+		if (in_subsys_list(subsys, d->subsystems))
+			return d->realcgroup;
+	}
 
-	while (getline(&line, &len, f) != -1) {
-		if (strstr(line, ":freezer:") == NULL && strstr(line, ":devices:") == NULL)
-			continue;
-		p = rindex(line, ':');
-		if (!p)
-			continue;
-		p++;
-		len = strlen(p) + 1;
-		p2 = p + len - 2;
-		while (*p2 == '\n') { len--; *p2 = '\0'; p2--; }
-		if (!cgroup || inlen <= 0) {
-			ret = len;
-			break;
-		}
-		if (cgroup && len > inlen) {
-			ret = -1;
-			break;
+	return NULL;
+}
+
+static int _setup_cgroup(struct lxc_handler *h, struct lxc_list *cgroups,
+			  int devices)
+{
+	struct lxc_list *iterator;
+	struct lxc_cgroup *cg;
+	int ret = -1;
+
+	if (lxc_list_empty(cgroups))
+		return 0;
+
+	lxc_list_for_each(iterator, cgroups) {
+		cg = iterator->elem;
+
+		if (devices == !strncmp("devices", cg->subsystem, 7)) {
+			if (strcmp(cg->subsystem, "devices.deny") == 0 &&
+					cgroup_devices_has_deny(h, cg->value))
+				continue;
+			if (strcmp(cg->subsystem, "devices.allow") == 0 &&
+					cgroup_devices_has_allow(h, cg->value))
+				continue;
+			if (lxc_cgroup_set_value(h, cg->subsystem, cg->value)) {
+				ERROR("Error setting %s to %s for %s\n",
+				      cg->subsystem, cg->value, h->name);
+				goto out;
+			}
 		}
-		strncpy(cgroup, p, len);
-		ret = len;
-		cgroup[len-1] = '\0';
-		break;
+
+		DEBUG("cgroup '%s' set to '%s'", cg->subsystem, cg->value);
 	}
 
-	if (line)
-		free(line);
+	ret = 0;
+	INFO("cgroup has been setup");
+out:
 	return ret;
 }
+
+int setup_cgroup_devices(struct lxc_handler *h, struct lxc_list *cgroups)
+{
+	return _setup_cgroup(h, cgroups, 1);
+}
+
+int setup_cgroup(struct lxc_handler *h, struct lxc_list *cgroups)
+{
+	return _setup_cgroup(h, cgroups, 0);
+}
diff --git a/src/lxc/cgroup.h b/src/lxc/cgroup.h
index 77c44cd..c327fa3 100644
--- a/src/lxc/cgroup.h
+++ b/src/lxc/cgroup.h
@@ -24,15 +24,34 @@
 #define _cgroup_h
 #include <stdbool.h>
 
+/*
+ * cgroup_desc: describe a container's cgroup membership
+ */
+struct cgroup_desc {
+	char *mntpt; /* where this is mounted */
+	char *subsystems; /* comma-separated list of subsystems, or NULL */
+	char *curcgroup; /* task's current cgroup, full pathanme */
+	char *realcgroup; /* the cgroup as known in /proc/self/cgroup */
+	struct cgroup_desc *next;
+};
+
 struct lxc_handler;
-extern int lxc_cgroup_destroy(const char *cgpath);
+extern void lxc_cgroup_destroy_desc(struct cgroup_desc *cgroups);
 extern char *lxc_cgroup_path_get(const char *subsystem, const char *name,
 			      const char *lxcpath);
-extern int lxc_cgroup_nrtasks(const char *cgpath);
-extern char *lxc_cgroup_path_create(const char *lxcgroup, const char *name);
-extern int lxc_cgroup_enter(const char *cgpath, pid_t pid);
+extern int lxc_cgroup_nrtasks(struct lxc_handler *handler);
+struct cgroup_desc *lxc_cgroup_path_create(const char *name);
+extern int lxc_cgroup_enter(struct cgroup_desc *cgroups, pid_t pid);
 extern int lxc_cgroup_attach(pid_t pid, const char *name, const char *lxcpath);
 extern char *cgroup_path_get(const char *subsystem, const char *cgpath);
-extern bool is_in_subcgroup(int pid, const char *subsystem, const char *cgpath);
-extern int lxc_curcgroup(char *cgroup, int inlen);
+extern bool get_subsys_mount(char *dest, const char *subsystem);
+extern bool is_in_subcgroup(int pid, const char *subsystem, struct cgroup_desc *d);
+/*
+ * Called by commands.c by a container's monitor to find out the
+ * container's cgroup path in a specific subsystem
+ */
+extern char *cgroup_get_subsys_path(struct lxc_handler *handler, const char *subsys);
+struct lxc_list;
+extern int setup_cgroup(struct lxc_handler *h, struct lxc_list *cgroups);
+extern int setup_cgroup_devices(struct lxc_handler *h, struct lxc_list *cgroups);
 #endif
diff --git a/src/lxc/commands.c b/src/lxc/commands.c
index b23eb98..f1f9fcc 100644
--- a/src/lxc/commands.c
+++ b/src/lxc/commands.c
@@ -341,6 +341,7 @@ static int lxc_cmd_get_clone_flags_callback(int fd, struct lxc_cmd_req *req,
 	return lxc_cmd_rsp_send(fd, &rsp);
 }
 
+extern char *cgroup_get_subsys_path(struct lxc_handler *handler, const char *subsys);
 /*
  * lxc_cmd_get_cgroup_path: Calculate a container's cgroup path for a
  * particular subsystem. This is the cgroup path relative to the root
@@ -348,15 +349,21 @@ static int lxc_cmd_get_clone_flags_callback(int fd, struct lxc_cmd_req *req,
  *
  * @name      : name of container to connect to
  * @lxcpath   : the lxcpath in which the container is running
+ * @subsystem : the subsystem being asked about
  *
  * Returns the path on success, NULL on failure. The caller must free() the
  * returned path.
  */
-char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath)
+char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
+	const char *subsystem)
 {
 	int ret, stopped = 0;
 	struct lxc_cmd_rr cmd = {
-		.req = { .cmd = LXC_CMD_GET_CGROUP },
+		.req = {
+			.cmd = LXC_CMD_GET_CGROUP,
+			.datalen = strlen(subsystem)+1,
+			.data = subsystem,
+		},
 	};
 
 	ret = lxc_cmd(name, &cmd, &stopped, lxcpath);
@@ -381,10 +388,17 @@ char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath)
 static int lxc_cmd_get_cgroup_callback(int fd, struct lxc_cmd_req *req,
 				       struct lxc_handler *handler)
 {
-	struct lxc_cmd_rsp rsp = {
-		.datalen = strlen(handler->cgroup) + 1,
-		.data = handler->cgroup,
-	};
+	struct lxc_cmd_rsp rsp;
+	char *path;
+
+	if (req->datalen < 1)
+		return -1;
+
+	path = cgroup_get_subsys_path(handler, req->data);
+	if (!path)
+		return -1;
+	rsp.datalen = strlen(path) + 1,
+	rsp.data = path;
 
 	return lxc_cmd_rsp_send(fd, &rsp);
 }
@@ -535,7 +549,13 @@ static int lxc_cmd_stop_callback(int fd, struct lxc_cmd_req *req,
 	memset(&rsp, 0, sizeof(rsp));
 	rsp.ret = kill(handler->pid, stopsignal);
 	if (!rsp.ret) {
-		ret = lxc_unfreeze_bypath(handler->cgroup);
+		char *path = cgroup_get_subsys_path(handler, "freezer");
+		if (!path) {
+			ERROR("container %s:%s is not in a freezer cgroup",
+				handler->lxcpath, handler->name);
+			return 0;
+		}
+		ret = lxc_unfreeze_bypath(path);
 		if (!ret)
 			return 0;
 
diff --git a/src/lxc/commands.h b/src/lxc/commands.h
index 46806cb..04ed182 100644
--- a/src/lxc/commands.h
+++ b/src/lxc/commands.h
@@ -69,7 +69,12 @@ struct lxc_cmd_console_rsp_data {
 extern int lxc_cmd_console_winch(const char *name, const char *lxcpath);
 extern int lxc_cmd_console(const char *name, int *ttynum, int *fd,
 			   const char *lxcpath);
-extern char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath);
+/*
+ * Get the 'real' cgroup path (as seen in /proc/self/cgroup) for a container
+ * for a particular subsystem
+ */
+extern char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
+			const char *subsystem);
 extern int lxc_cmd_get_clone_flags(const char *name, const char *lxcpath);
 extern char *lxc_cmd_get_config_item(const char *name, const char *item, const char *lxcpath);
 extern pid_t lxc_cmd_get_init_pid(const char *name, const char *lxcpath);
diff --git a/src/lxc/conf.c b/src/lxc/conf.c
index 1bd8c14..aff6982 100644
--- a/src/lxc/conf.c
+++ b/src/lxc/conf.c
@@ -1487,47 +1487,6 @@ static int setup_kmsg(const struct lxc_rootfs *rootfs,
 	return 0;
 }
 
-static int _setup_cgroup(const char *cgpath, struct lxc_list *cgroups,
-			  int devices)
-{
-	struct lxc_list *iterator;
-	struct lxc_cgroup *cg;
-	int ret = -1;
-
-	if (lxc_list_empty(cgroups))
-		return 0;
-
-	lxc_list_for_each(iterator, cgroups) {
-		cg = iterator->elem;
-
-		if (devices == !strncmp("devices", cg->subsystem, 7)) {
-			if (lxc_cgroup_set_bypath(cgpath, cg->subsystem,
-			    cg->value)) {
-				ERROR("Error setting %s to %s for %s\n",
-				      cg->subsystem, cg->value, cgpath);
-				goto out;
-			}
-		}
-
-		DEBUG("cgroup '%s' set to '%s'", cg->subsystem, cg->value);
-	}
-
-	ret = 0;
-	INFO("cgroup has been setup");
-out:
-	return ret;
-}
-
-int setup_cgroup_devices(const char *cgpath, struct lxc_list *cgroups)
-{
-	return _setup_cgroup(cgpath, cgroups, 1);
-}
-
-int setup_cgroup(const char *cgpath, struct lxc_list *cgroups)
-{
-	return _setup_cgroup(cgpath, cgroups, 0);
-}
-
 static void parse_mntopt(char *opt, unsigned long *flags, char **data)
 {
 	struct mount_opt *mo;
diff --git a/src/lxc/conf.h b/src/lxc/conf.h
index d6f85c8..26e2f92 100644
--- a/src/lxc/conf.h
+++ b/src/lxc/conf.h
@@ -301,8 +301,6 @@ struct lxc_conf {
 int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf,
 		  const char *lxcpath, char *argv[]);
 
-extern int setup_cgroup(const char *cgpath, struct lxc_list *cgroups);
-extern int setup_cgroup_devices(const char *cgpath, struct lxc_list *cgroups);
 extern int detect_shared_rootfs(void);
 
 /*
diff --git a/src/lxc/freezer.c b/src/lxc/freezer.c
index 37a07fd..f2e4f8e 100644
--- a/src/lxc/freezer.c
+++ b/src/lxc/freezer.c
@@ -145,14 +145,17 @@ int lxc_unfreeze(const char *name, const char *lxcpath)
 
 int lxc_unfreeze_bypath(const char *cgrelpath)
 {
-	char *cgabspath;
-	int ret;
+	char cgabspath[MAXPATHLEN];
+	int len, ret;
 
-	cgabspath = cgroup_path_get("freezer", cgrelpath);
-	if (!cgabspath)
+	if (!get_subsys_mount(cgabspath, "freezer"))
+		return -1;
+	len = strlen(cgabspath);
+	ret = snprintf(cgabspath+len, MAXPATHLEN-len, "/%s", cgrelpath);
+	if (ret < 0 || ret >= MAXPATHLEN-len) {
+		ERROR("freezer path name too long");
 		return -1;
+	}
 
-	ret = do_unfreeze(cgabspath, 0, NULL, NULL);
-	free(cgabspath);
-	return ret;
+	return do_unfreeze(cgabspath, 0, NULL, NULL);
 }
diff --git a/src/lxc/lxc.h b/src/lxc/lxc.h
index 67e0d9e..b9eb52c 100644
--- a/src/lxc/lxc.h
+++ b/src/lxc/lxc.h
@@ -136,15 +136,17 @@ extern int lxc_unfreeze_bypath(const char *cgpath);
  */
 extern lxc_state_t lxc_state(const char *name, const char *lxcpath);
 
+struct lxc_handler;
 /*
  * Set a specified value for a specified subsystem. The specified
  * subsystem must be fully specified, eg. "cpu.shares"
- * @cgpath    : the cgroup path of the container
+ * @d         : the cgroup descriptor for the container
  * @filename  : the cgroup attribute filename
  * @value     : the value to be set
  * Returns 0 on success, < 0 otherwise
  */
-extern int lxc_cgroup_set_bypath(const char *cgpath, const char *filename, const char *value);
+extern int lxc_cgroup_set_value(struct lxc_handler *hander, const char *filename,
+				const char *value);
 
 /*
  * Set a specified value for a specified subsystem. The specified
diff --git a/src/lxc/lxcutmp.c b/src/lxc/lxcutmp.c
index fd261b5..1421a9c 100644
--- a/src/lxc/lxcutmp.c
+++ b/src/lxc/lxcutmp.c
@@ -283,7 +283,7 @@ static int utmp_get_ntasks(struct lxc_handler *handler)
 {
 	int ntasks;
 
-	ntasks = lxc_cgroup_nrtasks(handler->cgroup);
+	ntasks = lxc_cgroup_nrtasks(handler);
 
 	if (ntasks < 0) {
 		ERROR("failed to get the number of tasks");
diff --git a/src/lxc/start.c b/src/lxc/start.c
index 00020de..f552e35 100644
--- a/src/lxc/start.c
+++ b/src/lxc/start.c
@@ -374,8 +374,7 @@ static void lxc_fini(const char *name, struct lxc_handler *handler)
 	handler->conf->maincmd_fd = -1;
 	free(handler->name);
 	if (handler->cgroup) {
-		lxc_cgroup_destroy(handler->cgroup);
-		free(handler->cgroup);
+		lxc_cgroup_destroy_desc(handler->cgroup);
 		handler->cgroup = NULL;
 	}
 	free(handler);
@@ -594,12 +593,11 @@ int save_phys_nics(struct lxc_conf *conf)
 	return 0;
 }
 
-extern bool is_in_subcgroup(int pid, const char *subsystem, const char *cgpath);
+extern bool is_in_subcgroup(int pid, const char *subsystem, struct cgroup_desc *d);
 int lxc_spawn(struct lxc_handler *handler)
 {
-	int failed_before_rename = 0, len;
+	int failed_before_rename = 0;
 	const char *name = handler->name;
-	char *curcgroup = NULL;
 
 	if (lxc_sync_init(handler))
 		return -1;
@@ -661,18 +659,10 @@ int lxc_spawn(struct lxc_handler *handler)
 	if (lxc_sync_wait_child(handler, LXC_SYNC_CONFIGURE))
 		failed_before_rename = 1;
 
-	if ((len = lxc_curcgroup(NULL, 0)) > 1) {
-		curcgroup = alloca(len);
-		if (lxc_curcgroup(curcgroup, len) <= 1)
-			curcgroup = NULL;
-		FILE *f = fopen("/tmp/a", "a");
-		fprintf(f, "curcgroup is %s\n", curcgroup);
-		fclose(f);
-	}
-	if ((handler->cgroup = lxc_cgroup_path_create(curcgroup, name)) == NULL)
+	if ((handler->cgroup = lxc_cgroup_path_create(name)) == NULL)
 		goto out_delete_net;
 
-	if (setup_cgroup(handler->cgroup, &handler->conf->cgroup)) {
+	if (setup_cgroup(handler, &handler->conf->cgroup)) {
 		ERROR("failed to setup the cgroups for '%s'", name);
 		goto out_delete_net;
 	}
@@ -707,15 +697,9 @@ int lxc_spawn(struct lxc_handler *handler)
 	if (lxc_sync_barrier_child(handler, LXC_SYNC_POST_CONFIGURE))
 		goto out_delete_net;
 
-	if (setup_cgroup_devices(handler->cgroup, &handler->conf->cgroup)) {
-		/* an unfortunate special case: startup hooks may have already
-		 * setup the cgroup.  If a setting fails, and this is the devices
-		 * subsystem, *and* we are already in a subset of the cgroup,
-		 * then ignore the failure */
-		if (!is_in_subcgroup(handler->pid, "devices", handler->cgroup)) {
-			ERROR("failed to setup the devices cgroup for '%s'", name);
-			goto out_delete_net;
-		}
+	if (setup_cgroup_devices(handler, &handler->conf->cgroup)) {
+		ERROR("failed to setup the devices cgroup for '%s'", name);
+		goto out_delete_net;
 	}
 
 	/* Tell the child to complete its initialization and wait for
diff --git a/src/lxc/start.h b/src/lxc/start.h
index ee011ea..8f3d580 100644
--- a/src/lxc/start.h
+++ b/src/lxc/start.h
@@ -37,6 +37,8 @@ struct lxc_operations {
 	int (*post_start)(struct lxc_handler *, void *);
 };
 
+struct cgroup_desc;
+
 struct lxc_handler {
 	pid_t pid;
 	char *name;
@@ -53,7 +55,7 @@ struct lxc_handler {
 #endif
 	int pinfd;
 	const char *lxcpath;
-	char *cgroup;
+	struct cgroup_desc *cgroup;
 };
 
 extern struct lxc_handler *lxc_init(const char *name, struct lxc_conf *, const char *);
diff --git a/src/tests/cgpath.c b/src/tests/cgpath.c
index 944769c..6761af4 100644
--- a/src/tests/cgpath.c
+++ b/src/tests/cgpath.c
@@ -37,102 +37,6 @@
 } while (0)
 
 /*
- * test_same_name: Create the same named container within a group
- *
- * @group : name of the container group or NULL for default "lxc"
- * @name  : name of the container
- *
- * Note, lxc will append a -<nr> to duplicate container names. This is what
- * is tested here.
- *
- * Returns 0 on success, < 0 on failure
- */
-static int test_same_name(const char *group, const char *name)
-{
-	int ret = -1;
-	char *cgrelpath1;
-	char *cgrelpath2;
-	char  relpath[PATH_MAX+1];
-
-	sprintf(relpath, "%s/%s-1", group ? group : "lxc", name);
-
-	cgrelpath1 = lxc_cgroup_path_create(group, name);
-	if (!cgrelpath1) {
-		TSTERR("lxc_cgroup_path_create returned NULL");
-		goto err1;
-	}
-	cgrelpath2 = lxc_cgroup_path_create(group, name);
-	if (!cgrelpath2) {
-		TSTERR("lxc_cgroup_path_create returned NULL");
-		goto err2;
-	}
-	if (strcmp(cgrelpath2, relpath)) {
-		TSTERR("unexpected name for duplicate %s", cgrelpath2);
-		goto err3;
-	}
-
-	ret = 0;
-err3:
-	lxc_cgroup_destroy(cgrelpath2);
-	free(cgrelpath2);
-err2:
-	lxc_cgroup_destroy(cgrelpath1);
-	free(cgrelpath1);
-err1:
-	return ret;
-}
-
-/*
- * test_basic: Test cgroup functions that don't require a running container
- *
- * @group : name of the container group or NULL for default "lxc"
- * @name  : name of the container
- *
- * Returns 0 on success, < 0 on failure
- */
-static int test_basic(const char *group, const char *name)
-{
-	int ret = -1;
-	char *cgabspath;
-	char *cgrelpath;
-	char  relpath[PATH_MAX+1];
-
-	sprintf(relpath, "%s/%s", group ? group : "lxc", name);
-
-	cgrelpath = lxc_cgroup_path_create(group, name);
-	if (!cgrelpath) {
-		TSTERR("lxc_cgroup_path_create returned NULL");
-		goto err1;
-	}
-	if (!strstr(cgrelpath, relpath)) {
-		TSTERR("lxc_cgroup_path_create %s not in %s", relpath, cgrelpath);
-		goto err2;
-	}
-
-	cgabspath = cgroup_path_get("freezer", cgrelpath);
-	if (!cgabspath) {
-		TSTERR("cgroup_path_get returned NULL");
-		goto err2;
-	}
-	if (!strstr(cgabspath, relpath)) {
-		TSTERR("cgroup_path_get %s not in %s", relpath, cgabspath);
-		goto err3;
-	}
-
-	ret = lxc_cgroup_destroy(cgrelpath);
-	if (ret < 0) {
-		TSTERR("lxc_cgroup_destroy failed");
-		goto err3;
-	}
-err3:
-	free(cgabspath);
-err2:
-	free(cgrelpath);
-err1:
-	return ret;
-}
-
-/*
  * test_running_container: test cgroup functions against a running container
  *
  * @group : name of the container group or NULL for default "lxc"
@@ -141,7 +45,7 @@ err1:
 static int test_running_container(const char *lxcpath,
 				  const char *group, const char *name)
 {
-	int nrtasks, ret = -1;
+	int ret = -1;
 	struct lxc_container *c = NULL;
 	char *cgrelpath;
 	char *cgabspath;
@@ -160,7 +64,7 @@ static int test_running_container(const char *lxcpath,
 		goto err2;
 	}
 
-	cgrelpath = lxc_cmd_get_cgroup_path(c->name, c->config_path);
+	cgrelpath = lxc_cmd_get_cgroup_path(c->name, c->config_path, "freezer");
 	if (!cgrelpath) {
 		TSTERR("lxc_cmd_get_cgroup_path returned NULL");
 		goto err2;
@@ -179,7 +83,7 @@ static int test_running_container(const char *lxcpath,
 	}
 	strcpy(value_save, value);
 
-	ret = lxc_cgroup_set_bypath(cgrelpath, "memory.swappiness", "100");
+	ret = lxc_cgroup_set(c->name, "memory.swappiness", "100", c->config_path);
 	if (ret < 0) {
 		TSTERR("lxc_cgroup_set_bypath failed");
 		goto err3;
@@ -213,12 +117,6 @@ static int test_running_container(const char *lxcpath,
 		goto err3;
 	}
 
-	nrtasks = lxc_cgroup_nrtasks(cgrelpath);
-	if (nrtasks < 1) {
-		TSTERR("failed getting nrtasks");
-		goto err3;
-	}
-
 	cgabspath = lxc_cgroup_path_get("freezer", c->name, c->config_path);
 	if (!cgabspath) {
 		TSTERR("lxc_cgroup_path_get returned NULL");
@@ -312,18 +210,6 @@ int main()
 		exit(0);
 	}
 
-	if (test_basic(NULL, MYNAME) < 0)
-		goto out;
-	if (test_basic("ab", MYNAME) < 0)
-		goto out;
-	printf("Basic cgroup path tests...Passed\n");
-
-	if (test_same_name(NULL, MYNAME) < 0)
-		goto out;
-	if (test_same_name("ab", MYNAME) < 0)
-		goto out;
-	printf("Same name tests...Passed\n");
-
 	#if TEST_ALREADY_RUNNING_CT
 
 	/*
-- 
1.8.1.2





More information about the lxc-devel mailing list