[lxc-devel] [PATCH 6/6] cgroup: improve support for multiple lxcpaths (v3)

Serge Hallyn serge.hallyn at ubuntu.com
Mon Mar 4 20:43:28 UTC 2013


Add a monitor command to get the cgroup for a running container.  This
allows container r1 started from /var/lib/lxc and container r1 started
from /home/ubuntu/lxcbase to pick unique cgroup directories (which
will be /sys/fs/cgroup/$subsys/lxc/r1 and .../r1-1), and all the lxc-*
tools to get that path over the monitor at lxcpath.

Rework the cgroup code.  Before, if /sys/fs/cgroup/$subsys/lxc/r1
already existed, it would be moved to 'deadXXXXX', and a new r1 created.
Instead, if r1 exists, use r1-1, r1-2, etc.

I ended up removing both the use of cgroup.clone_children and support
for ns cgroup.  Presumably we'll want to put support for ns cgroup
back in for older kernels.  Instead of guessing whether or not we
have clone_children support, just always explicitly do the only thing
that feature buys us - set cpuset.{cpus,mems} for newly created cgroups.

Note that upstream kernel is working toward strict hierarchical
limit enforcements, which will be good for us.

NOTE - I am changing the lxc_answer struct size.  This means that
upgrades to this version while containers are running will result
in lxc_* commands on pre-running containers will fail.

Changelog: (v3)
   implement cgroup attach
   fix a subtle bug arising when we lxc_get_cgpath() returned
     STOPPED rather than -1 (STOPPED is 0, and 0 meant success).
   Rename some functions and add detailed comments above most.
   Drop all my lxc_attach changes in favor of those by Christian
     Seiler (which are mostly the same, but improved).

Signed-off-by: Serge Hallyn <serge.hallyn at ubuntu.com>
---
 src/lxc/attach.c       |   1 -
 src/lxc/cgroup.c       | 941 ++++++++++++++++++++++++-------------------------
 src/lxc/cgroup.h       |  18 +-
 src/lxc/commands.c     |  11 +
 src/lxc/commands.h     |   3 +
 src/lxc/conf.c         |   7 +-
 src/lxc/conf.h         |   2 +-
 src/lxc/freezer.c      |  35 +-
 src/lxc/lxc.h          |  28 +-
 src/lxc/lxc_attach.c   |   2 +-
 src/lxc/lxc_cgroup.c   |   4 +-
 src/lxc/lxc_unshare.c  |  10 -
 src/lxc/lxccontainer.c |   4 +-
 src/lxc/lxcutmp.c      |   2 +-
 src/lxc/start.c        |  54 ++-
 src/lxc/start.h        |   2 +-
 src/lxc/state.c        |   7 +-
 src/lxc/stop.c         |   3 +-
 src/tests/Makefile.am  |   4 +-
 src/tests/cgpath.c     | 164 +++++++++
 src/tests/lxcpath.c    |   2 +-
 21 files changed, 774 insertions(+), 530 deletions(-)
 create mode 100644 src/tests/cgpath.c

diff --git a/src/lxc/attach.c b/src/lxc/attach.c
index af3d7a0..e0a40bd 100644
--- a/src/lxc/attach.c
+++ b/src/lxc/attach.c
@@ -42,7 +42,6 @@
 #include "log.h"
 #include "attach.h"
 #include "caps.h"
-#include "cgroup.h"
 #include "config.h"
 #include "apparmor.h"
 
diff --git a/src/lxc/cgroup.c b/src/lxc/cgroup.c
index 6630d6c..4d7de02 100644
--- a/src/lxc/cgroup.c
+++ b/src/lxc/cgroup.c
@@ -38,6 +38,7 @@
 
 #include "error.h"
 #include "config.h"
+#include "commands.h"
 
 #include <lxc/log.h>
 #include <lxc/cgroup.h>
@@ -53,11 +54,6 @@ lxc_log_define(lxc_cgroup, lxc);
 
 #define MTAB "/proc/mounts"
 
-enum {
-	CGROUP_NS_CGROUP = 1,
-	CGROUP_CLONE_CHILDREN,
-};
-
 /* Check if a mount is a cgroup hierarchy for any subsystem.
  * Return the first subsystem found (or NULL if none).
  */
@@ -93,22 +89,27 @@ static char *mount_has_subsystem(const struct mntent *mntent)
 
 /*
  * get_init_cgroup: get the cgroup init is in.
- *  dsg: preallocated buffer to put the output in
- *  subsystem: the exact cgroup subsystem to look up
- *  mntent: a mntent (from getmntent) whose mntopts contains the
- *          subsystem to look up.
+ * @subsystem: the exact cgroup subsystem to look up (I.e. "freezer")
+ * @mntent: a mntent (from getmntent) whose mntopts contains the subsystem to
+ * look up.
+ * @dsg: preallocated buffer of at least size MAXPATHLEN in which the path will
+ * be copied.
+ * @prependslash: if 1, the path will have a '/' prepended for easy of use by
+ * the caller.
  *
  * subsystem and mntent can both be NULL, in which case we return
  * the first entry in /proc/1/cgroup.
  *
- * Returns a pointer to the answer, which may be "".
+ * Returns a pointer to the answer (which is just the passed-in @dsg), which
+ * may be "".
  */
 static char *get_init_cgroup(const char *subsystem, struct mntent *mntent,
-			     char *dsg)
+			     char *dsg, int prependslash)
 {
 	FILE *f;
 	char *c, *c2;
 	char line[MAXPATHLEN];
+	int ret;
 
 	*dsg = '\0';
 	f = fopen("/proc/1/cgroup", "r");
@@ -134,10 +135,18 @@ static char *get_init_cgroup(const char *subsystem, struct mntent *mntent,
 good:
 		DEBUG("get_init_cgroup: found init cgroup for subsys %s at %s\n",
 			subsystem, c2);
-		strncpy(dsg, c2, MAXPATHLEN);
-		c = &dsg[strlen(dsg)-1];
+		ret = snprintf(dsg, MAXPATHLEN, "%s%s", prependslash ? "/" : "", c2);
+		if (ret < 0 || ret >= MAXPATHLEN) {
+			WARN("init cgroup path name was too long.");
+			goto found;
+		}
+		if (ret < 1)
+			goto found;
+
+		c = &dsg[ret-1];
 		if (*c == '\n')
 			*c = '\0';
+
 		goto found;
 	}
 
@@ -146,27 +155,22 @@ found:
 	return dsg;
 }
 
-static int get_cgroup_flags(struct mntent *mntent)
-{
-	int flags = 0;
-
-
-	if (hasmntopt(mntent, "ns"))
-		flags |= CGROUP_NS_CGROUP;
-
-	if (hasmntopt(mntent, "clone_children"))
-		flags |= CGROUP_CLONE_CHILDREN;
-
-	DEBUG("cgroup %s has flags 0x%x", mntent->mnt_dir, flags);
-	return flags;
-}
-
+/*
+ * Determine mountpoint for a cgroup subsystem, plus the cgroup path in that
+ * subsytem of the container init.
+ * @subsystem: cgroup subsystem (i.e. freezer).  If this is NULL, the first
+ * cgroup mountpoint with any subsystems is used.
+ * @mnt: a passed-in buffer of at least size MAXPATHLEN into which the path
+ * is copied.
+ *
+ * Returns 0 on success, -1 on error.
+ */
 static int get_cgroup_mount(const char *subsystem, char *mnt)
 {
 	struct mntent *mntent;
-	char initcgroup[MAXPATHLEN];
+	char initcgroup[MAXPATHLEN], *init;
 	FILE *file = NULL;
-	int ret, flags, err = -1;
+	int ret, err = -1;
 
 	file = setmntent(MTAB, "r");
 	if (!file) {
@@ -181,16 +185,14 @@ static int get_cgroup_mount(const char *subsystem, char *mnt)
 		if (subsystem) {
 			if (!hasmntopt(mntent, subsystem))
 				continue;
-		}
-		else {
+		} else {
 			if (!mount_has_subsystem(mntent))
 				continue;
 		}
 
-		flags = get_cgroup_flags(mntent);
-		ret = snprintf(mnt, MAXPATHLEN, "%s%s%s", mntent->mnt_dir,
-			       get_init_cgroup(subsystem, NULL, initcgroup),
-		               (flags & CGROUP_NS_CGROUP) ? "" : "/lxc");
+		init = get_init_cgroup(NULL, mntent, initcgroup, 1);
+		ret = snprintf(mnt, MAXPATHLEN, "%s%s", mntent->mnt_dir,
+				init);
 		if (ret < 0 || ret >= MAXPATHLEN)
 			goto fail;
 
@@ -207,180 +209,370 @@ out:
 	return err;
 }
 
-int lxc_ns_is_mounted(void)
+/*
+ * cgroup_path_get: Calculate the full path for a particular subsystem, plus
+ * a passed-in (to be appended) relative cgpath for a container.
+ * @path: a char** into which a pointer to the answer is copied
+ * @subsystem: subsystem of interest (i.e. freezer).
+ * @cgpath: a container's (relative) cgroup path, i.e. "/lxc/c1".
+ *
+ * Returns 0 on success, -1 on error.
+ *
+ * The answer is written in a static char[MAXPATHLEN] in this function and
+ * should not be freed.
+ */
+extern int cgroup_path_get(char **path, const char *subsystem, const char *cgpath)
 {
 	static char        buf[MAXPATHLEN];
+	static char        retbuf[MAXPATHLEN];
+	int rc;
 
-	return (get_cgroup_mount("ns", buf) == 0);
+	/* lxc_cgroup_set passes a state object for the subsystem,
+	 * so trim it to just the subsystem part */
+	if (subsystem) {
+		rc = snprintf(retbuf, MAXPATHLEN, "%s", subsystem);
+		if (rc < 0 || rc >= MAXPATHLEN) {
+			ERROR("subsystem name too long");
+			return -1;
+		}
+		char *s = index(retbuf, '.');
+		if (s)
+			*s = '\0';
+		DEBUG("%s: called for subsys %s name %s\n", __func__, retbuf, cgpath);
+	}
+	if (get_cgroup_mount(subsystem ? retbuf : NULL, buf)) {
+		ERROR("cgroup is not mounted");
+		return -1;
+	}
+
+	rc = snprintf(retbuf, MAXPATHLEN, "%s/%s", buf, cgpath);
+	if (rc < 0 || rc >= MAXPATHLEN) {
+		ERROR("name too long");
+		return -1;
+	}
+
+	DEBUG("%s: returning %s for subsystem %s", __func__, retbuf, subsystem);
+
+	*path = retbuf;
+	return 0;
 }
 
-static int cgroup_rename_nsgroup(const char *mnt, const char *name, pid_t pid)
+/*
+ * Calculate a container's cgroup path for a particular subsystem.  This
+ * is the cgroup path relative to the root of the cgroup filesystem.
+ * @path: A char ** into which we copy the char* containing the answer
+ * @subsystem: the cgroup subsystem of interest (i.e. freezer)
+ * @name: container name
+ * @lxcpath: the lxcpath in which the container is running.
+ *
+ * Returns 0 on success, -1 on error.
+ *
+ * Note that the char* copied into *path is a static char[MAXPATHLEN] in
+ * commands.c:receive_answer().  It should not be freed.
+ */
+extern int lxc_get_cgpath(const char **path, const char *subsystem, const char *name, const char *lxcpath)
 {
-	char oldname[MAXPATHLEN];
-	char newname[MAXPATHLEN];
-	int ret;
+	struct lxc_command command = {
+		.request = { .type = LXC_COMMAND_CGROUP },
+	};
+
+	int ret, stopped = 0;
 
-	ret = snprintf(oldname, MAXPATHLEN, "%s/%d", mnt, pid);
-	if (ret >= MAXPATHLEN)
+	ret = lxc_command(name, &command, &stopped, lxcpath);
+	if (ret < 0) {
+		if (!stopped)
+			ERROR("failed to send command");
 		return -1;
+	}
 
-	ret = snprintf(newname, MAXPATHLEN, "%s/%s", mnt, name);
-	if (ret >= MAXPATHLEN)
+	if (!ret) {
+		WARN("'%s' has stopped before sending its state", name);
 		return -1;
+	}
 
-	if (rename(oldname, newname)) {
-		SYSERROR("failed to rename cgroup %s->%s", oldname, newname);
+	if (command.answer.ret < 0 || command.answer.pathlen < 0) {
+		ERROR("failed to get state for '%s': %s",
+			name, strerror(-command.answer.ret));
 		return -1;
 	}
 
-	DEBUG("'%s' renamed to '%s'", oldname, newname);
+	*path = command.answer.path;
 
 	return 0;
 }
 
-static int cgroup_enable_clone_children(const char *path)
+/*
+ * lxc_cgroup_path_get: determine full pathname for a cgroup
+ * file for a specific container.
+ * @path: char ** used to return the answer.  The char * will point
+ * into the static char* retuf from cgroup_path_get() (so no need
+ * to free it).
+ * @subsystem: cgroup subsystem (i.e. "freezer") for which to
+ * return an answer.  If NULL, then the first cgroup entry in
+ * mtab will be used.
+ *
+ * This is the exported function, which determines cgpath from the
+ * monitor running in lxcpath.
+ *
+ * Returns 0 on success, < 0 on error.
+ */
+int lxc_cgroup_path_get(char **path, const char *subsystem, const char *name, const char *lxcpath)
 {
-	FILE *f;
-	int ret = 0;
+	const char *cgpath;
 
-	f = fopen(path, "w");
-	if (!f) {
-		SYSERROR("failed to open '%s'", path);
+	if (lxc_get_cgpath(&cgpath, subsystem, name, lxcpath) < 0)
 		return -1;
-	}
 
-	if (fprintf(f, "1") < 1) {
-		ERROR("failed to write flag to '%s'", path);
-		ret = -1;
+	return cgroup_path_get(path, subsystem, cgpath);
+}
+
+/*
+ * small helper which simply write a value into a (cgroup) file
+ */
+static int do_cgroup_set(const char *path, const char *value)
+{
+	int fd, ret;
+
+	if ((fd = open(path, O_WRONLY)) < 0) {
+		SYSERROR("open %s : %s", path, strerror(errno));
+		return -1;
 	}
 
-	fclose(f);
+	if ((ret = write(fd, value, strlen(value))) < 0) {
+		close(fd);
+		SYSERROR("write %s : %s", path, strerror(errno));
+		return ret;
+	}
 
-	return ret;
+	if ((ret = close(fd)) < 0) {
+		SYSERROR("close %s : %s", path, strerror(errno));
+		return ret;
+	}
+	return 0;
 }
 
-static int lxc_one_cgroup_finish_attach(int fd, pid_t pid)
+/*
+ * small helper to write a value into a file in a particular directory.
+ * @cgpath: the directory in which to find the file
+ * @filename: the file (under cgpath) to which to write
+ * @value: what to write
+ *
+ * Returns 0 on success, < 0 on error.
+ */
+int lxc_cgroup_set_bypath(const char *cgpath, const char *filename, const char *value)
 {
-       char buf[32];
-       int ret;
+	int ret;
+	char *dirpath;
+	char path[MAXPATHLEN];
 
-       snprintf(buf, 32, "%ld", (long)pid);
+	ret = cgroup_path_get(&dirpath, filename, cgpath);
+	if (ret)
+		return -1;
 
-       ret = write(fd, buf, strlen(buf));
-       if (ret <= 0) {
-               SYSERROR("failed to write pid '%ld' to fd '%d'", (long)pid, fd);
-               ret = -1;
-       } else {
-               ret = 0;
-       }
+	ret = snprintf(path, MAXPATHLEN, "%s/%s", dirpath, filename);
+	if (ret < 0 || ret >= MAXPATHLEN) {
+		ERROR("pathname too long");
+		return -1;
+	}
 
-       close(fd);
-       return ret;
+	return do_cgroup_set(path, value);
 }
 
-static int lxc_one_cgroup_dispose_attach(int fd)
+/*
+ * set a cgroup value for a container
+ *
+ * @name: name of the container
+ * @filename: the cgroup file (i.e. freezer.state) whose value to change
+ * @value: the value to write to the file
+ * @lxcpath: the lxcpath under which the container is running.
+ *
+ * Returns 0 on success, < 0 on error.
+ */
+
+int lxc_cgroup_set(const char *name, const char *filename, const char *value,
+		   const char *lxcpath)
 {
-       close(fd);
-       return 0;
+	int ret;
+	char *dirpath;
+	char path[MAXPATHLEN];
+
+	ret = lxc_cgroup_path_get(&dirpath, filename, name, lxcpath);
+	if (ret)
+		return -1;
+
+	ret = snprintf(path, MAXPATHLEN, "%s/%s", dirpath, filename);
+	if (ret < 0 || ret >= MAXPATHLEN) {
+		ERROR("pathname too long");
+		return -1;
+	}
+
+	return do_cgroup_set(path, value);
 }
 
-static int lxc_one_cgroup_prepare_attach(const char *name,
-					 struct mntent *mntent)
+/*
+ * Get value of a cgroup setting for a container.
+ *
+ * @name: name of the container
+ * @filename: the cgroup file to read (i.e. 'freezer.state')
+ * @value: a preallocated char* into which to copy the answer
+ * @len: the length of pre-allocated @value
+ * @lxcpath: the lxcpath in which the container is running (i.e.
+ * /var/lib/lxc)
+ *
+ * Returns < 0 on error, or the number of bytes read.
+ *
+ * If you pass in NULL value or 0 len, then you are asking for the size of the
+ * file.
+ *
+ * Note that we can't get the file size quickly through stat or lseek.
+ * Therefore if you pass in len > 0 but less than the file size, your only
+ * indication will be that the return value will be equal to the passed-in ret.
+ * We will not return the actual full file size.
+ */
+int lxc_cgroup_get(const char *name, const char *filename, char *value,
+		   size_t len, const char *lxcpath)
 {
-	int fd;
-	char tasks[MAXPATHLEN], initcgroup[MAXPATHLEN];
-	char *cgmnt = mntent->mnt_dir;
-	int flags;
+	int fd, ret = -1;
+	char *dirpath;
+	char path[MAXPATHLEN];
 	int rc;
 
-	flags = get_cgroup_flags(mntent);
+	ret = lxc_cgroup_path_get(&dirpath, filename, name, lxcpath);
+	if (ret)
+		return -1;
 
-	rc = snprintf(tasks, MAXPATHLEN, "%s%s%s/%s/tasks", cgmnt,
-	         get_init_cgroup(NULL, mntent, initcgroup),
-	         (flags & CGROUP_NS_CGROUP) ? "" : "/lxc",
-	         name);
+	rc = snprintf(path, MAXPATHLEN, "%s/%s", dirpath, filename);
 	if (rc < 0 || rc >= MAXPATHLEN) {
 		ERROR("pathname too long");
 		return -1;
 	}
 
-	fd = open(tasks, O_WRONLY);
+	fd = open(path, O_RDONLY);
 	if (fd < 0) {
-		SYSERROR("failed to open '%s'", tasks);
+		ERROR("open %s : %s", path, strerror(errno));
 		return -1;
 	}
 
-	return fd;
-}
-
-static int lxc_one_cgroup_attach(const char *name, struct mntent *mntent, pid_t pid)
-{
-	int fd;
-
-	fd = lxc_one_cgroup_prepare_attach(name, mntent);
-	if (fd < 0) {
-		return -1;
+	if (!len || !value) {
+		char buf[100];
+		int count = 0;
+		while ((ret = read(fd, buf, 100)) > 0)
+			count += ret;
+		if (ret >= 0)
+			ret = count;
+	} else {
+		memset(value, 0, len);
+		ret = read(fd, value, len);
 	}
 
-	return lxc_one_cgroup_finish_attach(fd, pid);
+	if (ret < 0)
+		ERROR("read %s : %s", path, strerror(errno));
+
+	close(fd);
+	return ret;
 }
 
-int lxc_cgroup_dispose_attach(void *data)
+int lxc_cgroup_nrtasks(const char *cgpath)
 {
-	int *fds = data;
-	int ret, err;
+	char *dpath;
+	char path[MAXPATHLEN];
+	int pid, ret, count = 0;
+	FILE *file;
+	int rc;
 
-	if (!fds) {
-		return 0;
-	}
+	ret = cgroup_path_get(&dpath, NULL, cgpath);
+	if (ret)
+		return -1;
 
-	ret = 0;
+	rc = snprintf(path, MAXPATHLEN, "%s/tasks", dpath);
+	if (rc < 0 || rc >= MAXPATHLEN) {
+		ERROR("pathname too long");
+		return -1;
+	}
 
-	for (; *fds >= 0; fds++) {
-		err = lxc_one_cgroup_dispose_attach(*fds);
-		if (err) {
-			ret = err;
-		}
+	file = fopen(path, "r");
+	if (!file) {
+		SYSERROR("fopen '%s' failed", path);
+		return -1;
 	}
 
-	free(data);
+	while (fscanf(file, "%d", &pid) != EOF)
+		count++;
 
-	return ret;
+	fclose(file);
+
+	return count;
 }
 
-int lxc_cgroup_finish_attach(void *data, pid_t pid)
+/*
+ * Set of helper functions to make sure that, when we create a new
+ * cpuset cgroup, its cpus and mems files inherit the values in the
+ * parent cgroup
+ */
+static long get_value(const char *dir, const char *file)
 {
-	int *fds = data;
-	int err;
+	FILE *f;
+	char path[MAXPATHLEN];
+	int ret, retv;
 
-	if (!fds) {
+	retv = snprintf(path, MAXPATHLEN, "%s/%s", dir, file);
+	if (retv < 0 || retv >= MAXPATHLEN)
 		return 0;
-	}
+	f = fopen(path, "r");
+	ret = fscanf(f, "%d", &retv);
+	fclose(f);
+	if (ret != 1)
+		return 0;
+	return retv;
+}
 
-	for (; *fds >= 0; fds++) {
-		err = lxc_one_cgroup_finish_attach(*fds, pid);
-		if (err) {
-			/* get rid of the rest of them */
-			lxc_cgroup_dispose_attach(data);
-			return -1;
-		}
-		*fds = -1;
-	}
+static void set_value(const char *dir, const char *file, long v)
+{
+	FILE *f;
+	char path[MAXPATHLEN];
+	int retv;
 
-	free(data);
+	retv = snprintf(path, MAXPATHLEN, "%s/%s", dir, file);
+	if (retv < 0 || retv >= MAXPATHLEN)
+		return;
+	f = fopen(path, "w");
+	fprintf(f, "%ld\n", v);
+	fclose(f);
+}
 
-	return 0;
+static void setup_cpuset(const char *path)
+{
+	/* copy parent values for mems_allowed and cpus_allowed */
+	char *parentpath = strdup(path);
+	char *p;
+	long v;
+	if ((p = rindex(parentpath, '/')) == NULL)
+		goto out;
+	v = get_value(parentpath, "cpuset.mems");
+	set_value(path, "cpuset.mems", v);
+	v = get_value(parentpath, "cpuset.cpus");
+	set_value(path, "cpuset.cpus", v);
+	
+out:
+	free(parentpath);
 }
 
-int lxc_cgroup_prepare_attach(const char *name, void **data)
+/*
+ * Make sure the 'cgroup group' exists, so that we don't have to worry about
+ * that later.
+ *
+ * @lxcgroup: the cgroup group, i.e. 'lxc' by default.
+ *
+ * See detailed comments at lxc_cgroup_path_create for more information.
+ *
+ * Returns 0 on success, -1 on error.
+ */
+static int create_lxcgroups(const char *lxcgroup)
 {
-	struct mntent *mntent;
 	FILE *file = NULL;
-	int err = -1;
-	int found = 0;
-	int *fds;
-	int i;
-	static const int MAXFDS = 256;
+	struct mntent *mntent;
+	int ret, retv = -1;
+	char path[MAXPATHLEN], initpath[MAXPATHLEN];
 
 	file = setmntent(MTAB, "r");
 	if (!file) {
@@ -388,205 +580,145 @@ int lxc_cgroup_prepare_attach(const char *name, void **data)
 		return -1;
 	}
 
-	/* create a large enough buffer for all practical
-	 * use cases
-	 */
-	fds = malloc(sizeof(int) * MAXFDS);
-	if (!fds) {
-		err = -1;
-		goto out;
-	}
-	for (i = 0; i < MAXFDS; i++) {
-		fds[i] = -1;
-	}
-
-	err = 0;
-	i = 0;
 	while ((mntent = getmntent(file))) {
-		if (i >= MAXFDS - 1) {
-			ERROR("too many cgroups to attach to, aborting");
-			lxc_cgroup_dispose_attach(fds);
-			errno = ENOMEM;
-			err = -1;
-			goto out;
-		}
-
-		DEBUG("checking '%s' (%s)", mntent->mnt_dir, mntent->mnt_type);
+		char *init = get_init_cgroup(NULL, mntent, initpath, 1);
 
 		if (strcmp(mntent->mnt_type, "cgroup"))
 			continue;
 		if (!mount_has_subsystem(mntent))
 			continue;
 
-		INFO("[%d] found cgroup mounted at '%s',opts='%s'",
-		     ++found, mntent->mnt_dir, mntent->mnt_opts);
-
-		fds[i] = lxc_one_cgroup_prepare_attach(name, mntent);
-		if (fds[i] < 0) {
-			err = fds[i];
-			lxc_cgroup_dispose_attach(fds);
-			goto out;
+		/* 
+		 * TODO - handle case where lxcgroup has subdirs?  (i.e. build/l1)
+		 * May not be worthwhile - remember cgroup depth has perf penalties
+		 * */
+		ret = snprintf(path, MAXPATHLEN, "%s%s/%s",
+			       mntent->mnt_dir, init, lxcgroup ? lxcgroup : "lxc");
+		if (ret < 0 || ret >= MAXPATHLEN)
+			goto fail;
+		if (access(path, F_OK)) {
+			ret = mkdir(path, 0755);
+			if (ret == -1 && errno != EEXIST) {
+				SYSERROR("failed to create '%s' directory", path);
+				goto fail;
+			}
+		} else if (hasmntopt(mntent, "cpuset")) {
+			setup_cpuset(path);
 		}
-		i++;
-	};
-
-	if (!found)
-		ERROR("No cgroup mounted on the system");
-
-	*data = fds;
 
-out:
-	endmntent(file);
-	return err;
-}
-
-/*
- * for each mounted cgroup, attach a pid to the cgroup for the container
- */
-int lxc_cgroup_attach(const char *name, pid_t pid)
-{
-	void *data = NULL;
-	int ret;
-
-	ret = lxc_cgroup_prepare_attach(name, &data);
-	if (ret < 0) {
-		return ret;
 	}
 
-	return lxc_cgroup_finish_attach(data, pid);
+	retv = 0;
+fail:
+	endmntent(file);
+	return retv;
 }
 
 /*
- * rename cgname, which is under cgparent, to a new name starting
- * with 'cgparent/dead'.  That way cgname can be reused.  Return
- * 0 on success, -1 on failure.
+ * For a new container, find a cgroup path which is unique in all cgroup mounts.
+ * I.e. if r1 is already running, then /lxc/r1-1 may be used.
+ *
+ * @lxcgroup: the cgroup 'group' the contaienr should run in.  By default, this
+ * is just 'lxc'.  Admins may wish to group some containers into other groups,
+ * i.e. 'build', to take advantage of cgroup hierarchy to simplify group
+ * administration.  Also, unprivileged users who are placed into a cgroup by
+ * libcgroup_pam will be using that cgroup rather than the system-wide 'lxc'
+ * group.
+ * @name: the name of the container
+ *
+ * The chosen cgpath is returned as a strdup'd string.  The caller will have to
+ * free that eventually, however the lxc monitor will keep that string so as to
+ * return it in response to a LXC_COMMAND_CGROUP query.
+ *
+ * Note the path is relative to cgroup mounts plus the caller's init task's
+ * cgroup.  I.e. if init is in cgroup /init and the freezer subsystem is at
+ * /sys/fs/cgroup/freezer, and this fn returns '/lxc/r1', then the freezer
+ * cgroup's full path will be /sys/fs/cgroup/freezer/init/lxc/r1/.  Note also
+ * that this should cleanly account for init being in different cgroups for
+ * different subsystems.
+ *
+ * XXX This should probably be locked globally
+ * 
+ * Races won't be determintal, you'll just end up with leftover unused cgroups
  */
-int try_to_move_cgname(char *cgparent, char *cgname)
+char *lxc_cgroup_path_create(const char *lxcgroup, const char *name)
 {
-	char *newdir;
-
-	/* tempnam problems don't matter here - cgroupfs will prevent
-	 * duplicates if we race, and we'll just fail at that (unlikely)
-	 * point
-	 */
-
-	newdir = tempnam(cgparent, "dead");
-	if (!newdir)
-		return -1;
-	if (rename(cgname, newdir))
-		return -1;
-	WARN("non-empty cgroup %s renamed to %s, please manually inspect it\n",
-		cgname, newdir);
+	int i = 0, ret;
+	char *retpath, path[MAXPATHLEN], initpath[MAXPATHLEN], *init;
+	char tail[12];
+	FILE *file = NULL;
+	struct mntent *mntent;
 
-	return 0;
-}
+	if (create_lxcgroups(lxcgroup) < 0)
+		return NULL;
 
-/*
- * create a cgroup for the container in a particular subsystem.
- */
-static int lxc_one_cgroup_create(const char *name,
-				 struct mntent *mntent, pid_t pid)
-{
-	char cginit[MAXPATHLEN], cgname[MAXPATHLEN], cgparent[MAXPATHLEN];
-	char clonechild[MAXPATHLEN];
-	char initcgroup[MAXPATHLEN];
-	int flags, ret;
-
-	/* cgparent is the parent dir, e.g., /sys/fs/cgroup/<cgroup>/<init-cgroup>/lxc */
-	/* (remember get_init_cgroup() returns a path starting with '/') */
-	/* cgname is the full name, e.g., /sys/fs/cgroup/<cgroup>/<init-cgroup>/lxc/name */
-	ret = snprintf(cginit, MAXPATHLEN, "%s%s", mntent->mnt_dir,
-		get_init_cgroup(NULL, mntent, initcgroup));
-	if (ret < 0 || ret >= MAXPATHLEN) {
-		SYSERROR("Failed creating pathname for init's cgroup (%d)\n", ret);
-		return -1;
+again:
+	file = setmntent(MTAB, "r");
+	if (!file) {
+		SYSERROR("failed to open %s", MTAB);
+		return NULL;
 	}
 
-	flags = get_cgroup_flags(mntent);
+	if (i)
+		snprintf(tail, 12, "-%d", i);
+	else
+		*tail = '\0';
 
-	ret = snprintf(cgparent, MAXPATHLEN, "%s%s", cginit,
-		       (flags & CGROUP_NS_CGROUP) ? "" : "/lxc");
-	if (ret < 0 || ret >= MAXPATHLEN) {
-		SYSERROR("Failed creating pathname for cgroup parent (%d)\n", ret);
-		return -1;
-	}
-	ret = snprintf(cgname, MAXPATHLEN, "%s/%s", cgparent, name);
-	if (ret < 0 || ret >= MAXPATHLEN) {
-		SYSERROR("Failed creating pathname for cgroup (%d)\n", ret);
-		return -1;
-	}
-
-	/* Do we have the deprecated ns_cgroup subsystem? */
-	if (flags & CGROUP_NS_CGROUP) {
-		WARN("using deprecated ns_cgroup");
-		return cgroup_rename_nsgroup(cginit, name, pid);
-	}
+	while ((mntent = getmntent(file))) {
 
-	ret = snprintf(clonechild, MAXPATHLEN, "%s/cgroup.clone_children",
-		       cginit);
-	if (ret < 0 || ret >= MAXPATHLEN) {
-		SYSERROR("Failed creating pathname for clone_children (%d)\n", ret);
-		return -1;
-	}
+		if (strcmp(mntent->mnt_type, "cgroup"))
+			continue;
+		if (!mount_has_subsystem(mntent))
+			continue;
 
-	/* we check if the kernel has clone_children, at this point if there
-	 * no clone_children neither ns_cgroup, that means the cgroup is mounted
-	 * without the ns_cgroup and it has not the compatibility flag
-	 */
-	if (access(clonechild, F_OK)) {
-		ERROR("no ns_cgroup option specified");
-		return -1;
-	}
+		init = get_init_cgroup(NULL, mntent, initpath, 1);
+		/* find unused mnt_dir + init_cgroup + lxcgroup + name + -$i */
+		ret = snprintf(path, MAXPATHLEN, "%s%s/%s/%s%s",
+			       mntent->mnt_dir, init,
+			       lxcgroup ? lxcgroup : "lxc", name, tail);
+		if (ret < 0 || ret >= MAXPATHLEN)
+			goto fail;
 
-	/* enable the clone_children flag of the cgroup */
-	if (cgroup_enable_clone_children(clonechild)) {
-		SYSERROR("failed to enable 'clone_children flag");
-		return -1;
-	}
+		if (access(path, F_OK) == 0) goto next;
 
-	/* if cgparent does not exist, create it */
-	if (access(cgparent, F_OK)) {
-		ret = mkdir(cgparent, 0755);
-		if (ret == -1 && errno != EEXIST) {
-			SYSERROR("failed to create '%s' directory", cgparent);
-			return -1;
+		if (mkdir(path, 0755)) {
+			ERROR("Error creating cgroups");
+			goto fail;
 		}
-	}
 
-	/*
-	 * There is a previous cgroup.  Try to delete it.  If that fails
-	 * (i.e. it is not empty) try to move it out of the way.
-	 */
-	if (!access(cgname, F_OK) && rmdir(cgname)) {
-		if (try_to_move_cgname(cgparent, cgname)) {
-			SYSERROR("failed to remove previous cgroup '%s'", cgname);
-			ERROR("##");
-			ERROR("# The container might be already running!");
-			ERROR("##");
-			return -1;
+		if (hasmntopt(mntent, "cpuset")) {
+			setup_cpuset(path);
 		}
 	}
 
-	/* Let's create the cgroup */
-	if (mkdir(cgname, 0755)) {
-		SYSERROR("failed to create '%s' directory", cgname);
-		return -1;
-	}
+	endmntent(file);
 
-	INFO("created cgroup '%s'", cgname);
+	// print out the cgpath part
+	ret = snprintf(path, MAXPATHLEN, "%s/%s%s",
+		       lxcgroup ? lxcgroup : "lxc", name, tail);
+	if (ret < 0 || ret >= MAXPATHLEN) // can't happen
+		goto fail;
 
-	return 0;
+	retpath = strdup(path);
+
+	return retpath;
+
+next:
+	endmntent(file);
+	i++;
+	goto again;
+
+fail:
+	endmntent(file);
+	return NULL;
 }
 
-/*
- * for each mounted cgroup, create a cgroup for the container and attach a pid
- */
-int lxc_cgroup_create(const char *name, pid_t pid)
+int lxc_cgroup_enter(const char *cgpath, pid_t pid)
 {
+	char path[MAXPATHLEN], initpath[MAXPATHLEN], *init;
+	FILE *file = NULL, *fout;
 	struct mntent *mntent;
-	FILE *file = NULL;
-	int err = -1;
-	int found = 0;
+	int ret, retv = -1;
 
 	file = setmntent(MTAB, "r");
 	if (!file) {
@@ -595,31 +727,30 @@ int lxc_cgroup_create(const char *name, pid_t pid)
 	}
 
 	while ((mntent = getmntent(file))) {
-		DEBUG("checking '%s' (%s)", mntent->mnt_dir, mntent->mnt_type);
-
 		if (strcmp(mntent->mnt_type, "cgroup"))
 			continue;
 		if (!mount_has_subsystem(mntent))
 			continue;
-
-		INFO("[%d] found cgroup mounted at '%s',opts='%s'",
-		     ++found, mntent->mnt_dir, mntent->mnt_opts);
-
-		err = lxc_one_cgroup_create(name, mntent, pid);
-		if (err)
+		init = get_init_cgroup(NULL, mntent, initpath, 1);
+		ret = snprintf(path, MAXPATHLEN, "%s%s/%s/tasks",
+			       mntent->mnt_dir, init, cgpath);
+		if (ret < 0 || ret >= MAXPATHLEN) {
+			ERROR("entering cgroup");
 			goto out;
-
-		err = lxc_one_cgroup_attach(name, mntent, pid);
-		if (err)
+		}
+		fout = fopen(path, "w");
+		if (!fout) {
+			ERROR("entering cgroup");
 			goto out;
-	};
-
-	if (!found)
-		ERROR("No cgroup mounted on the system");
+		}
+		fprintf(fout, "%d\n", (int)pid);
+		fclose(fout);
+	}
+	retv = 0;
 
 out:
 	endmntent(file);
-	return err;
+	return retv;
 }
 
 int recursive_rmdir(char *dirname)
@@ -667,16 +798,14 @@ int recursive_rmdir(char *dirname)
 
 }
 
-int lxc_one_cgroup_destroy(struct mntent *mntent, const char *name)
+static int lxc_one_cgroup_destroy(struct mntent *mntent, const char *cgpath)
 {
 	char cgname[MAXPATHLEN], initcgroup[MAXPATHLEN];
 	char *cgmnt = mntent->mnt_dir;
-	int flags = get_cgroup_flags(mntent);
 	int rc;
 
-	rc = snprintf(cgname, MAXPATHLEN, "%s%s%s/%s", cgmnt,
-		get_init_cgroup(NULL, mntent, initcgroup),
-		(flags & CGROUP_NS_CGROUP) ? "" : "/lxc", name);
+	rc = snprintf(cgname, MAXPATHLEN, "%s%s/%s", cgmnt,
+		get_init_cgroup(NULL, mntent, initcgroup, 1), cgpath);
 	if (rc < 0 || rc >= MAXPATHLEN) {
 		ERROR("name too long");
 		return -1;
@@ -695,11 +824,11 @@ int lxc_one_cgroup_destroy(struct mntent *mntent, const char *name)
 /*
  * for each mounted cgroup, destroy the cgroup for the container
  */
-int lxc_cgroup_destroy(const char *name)
+int lxc_cgroup_destroy(const char *cgpath)
 {
 	struct mntent *mntent;
 	FILE *file = NULL;
-	int err = -1;
+	int err, retv  = 0;
 
 	file = setmntent(MTAB, "r");
 	if (!file) {
@@ -713,168 +842,24 @@ int lxc_cgroup_destroy(const char *name)
 		if (!mount_has_subsystem(mntent))
 			continue;
 
-		err = lxc_one_cgroup_destroy(mntent, name);
-		if (err)
-			break;
+		err = lxc_one_cgroup_destroy(mntent, cgpath);
+		if (err)  // keep trying to clean up the others
+			retv = -1;
 	}
 
 	endmntent(file);
-	return err;
-}
-/*
- * lxc_cgroup_path_get: put into *path the pathname for
- * %subsystem and cgroup %name.  If %subsystem is NULL, then
- * the first mounted cgroup will be used (for nr_tasks)
- */
-int lxc_cgroup_path_get(char **path, const char *subsystem, const char *name)
-{
-	static char        buf[MAXPATHLEN];
-	static char        retbuf[MAXPATHLEN];
-	int rc;
-
-	/* lxc_cgroup_set passes a state object for the subsystem,
-	 * so trim it to just the subsystem part */
-	if (subsystem) {
-		rc = snprintf(retbuf, MAXPATHLEN, "%s", subsystem);
-		if (rc < 0 || rc >= MAXPATHLEN) {
-			ERROR("subsystem name too long");
-			return -1;
-		}
-		char *s = index(retbuf, '.');
-		if (s)
-			*s = '\0';
-		DEBUG("%s: called for subsys %s name %s\n", __func__, retbuf, name);
-	}
-	if (get_cgroup_mount(subsystem ? retbuf : NULL, buf)) {
-		ERROR("cgroup is not mounted");
-		return -1;
-	}
-
-	rc = snprintf(retbuf, MAXPATHLEN, "%s/%s", buf, name);
-	if (rc < 0 || rc >= MAXPATHLEN) {
-		ERROR("name too long");
-		return -1;
-	}
-
-	DEBUG("%s: returning %s for subsystem %s", __func__, retbuf, subsystem);
-
-	*path = retbuf;
-	return 0;
+	return retv;
 }
 
-int lxc_cgroup_set(const char *name, const char *filename, const char *value)
+int lxc_cgroup_attach(pid_t pid, const char *name, const char *lxcpath)
 {
-	int fd, ret;
-	char *dirpath;
-	char path[MAXPATHLEN];
-	int rc;
+	const char *dirpath;
 
-	ret = lxc_cgroup_path_get(&dirpath, filename, name);
-	if (ret)
-		return -1;
-
-	rc = snprintf(path, MAXPATHLEN, "%s/%s", dirpath, filename);
-	if (rc < 0 || rc >= MAXPATHLEN) {
-		ERROR("pathname too long");
-		return -1;
-	}
-
-	fd = open(path, O_WRONLY);
-	if (fd < 0) {
-		ERROR("open %s : %s", path, strerror(errno));
+	if (lxc_get_cgpath(&dirpath, NULL, name, lxcpath) < 0) {
+		ERROR("Error getting cgroup for container %s: %s", lxcpath, name);
 		return -1;
 	}
+	INFO("joining pid %d to cgroup %s", pid, dirpath);
 
-	ret = write(fd, value, strlen(value));
-	if (ret < 0) {
-		ERROR("write %s : %s", path, strerror(errno));
-		goto out;
-	}
-
-	ret = 0;
-out:
-	close(fd);
-	return ret;
-}
-
-/*
- * If you pass in NULL value or 0 len, then you are asking for the size
- * of the file.  Note that we can't get the file size quickly through stat
- * or lseek.  Therefore if you pass in len > 0 but less than the file size,
- * your only indication will be that the return value will be equal to the
- * passed-in ret.  We will not return the actual full file size.
- */
-int lxc_cgroup_get(const char *name, const char *filename,
-		   char *value, size_t len)
-{
-	int fd, ret = -1;
-	char *dirpath;
-	char path[MAXPATHLEN];
-	int rc;
-
-	ret = lxc_cgroup_path_get(&dirpath, filename, name);
-	if (ret)
-		return -1;
-
-	rc = snprintf(path, MAXPATHLEN, "%s/%s", dirpath, filename);
-	if (rc < 0 || rc >= MAXPATHLEN) {
-		ERROR("pathname too long");
-		return -1;
-	}
-
-	fd = open(path, O_RDONLY);
-	if (fd < 0) {
-		ERROR("open %s : %s", path, strerror(errno));
-		return -1;
-	}
-
-    if (!len || !value) {
-        char buf[100];
-        int count = 0;
-        while ((ret = read(fd, buf, 100)) > 0)
-            count += ret;
-        if (ret >= 0)
-            ret = count;
-    } else {
-        memset(value, 0, len);
-        ret = read(fd, value, len);
-    }
-
-	if (ret < 0)
-		ERROR("read %s : %s", path, strerror(errno));
-
-	close(fd);
-	return ret;
-}
-
-int lxc_cgroup_nrtasks(const char *name)
-{
-	char *dpath;
-	char path[MAXPATHLEN];
-	int pid, ret, count = 0;
-	FILE *file;
-	int rc;
-
-	ret = lxc_cgroup_path_get(&dpath, NULL, name);
-	if (ret)
-		return -1;
-
-	rc = snprintf(path, MAXPATHLEN, "%s/tasks", dpath);
-	if (rc < 0 || rc >= MAXPATHLEN) {
-		ERROR("pathname too long");
-		return -1;
-	}
-
-	file = fopen(path, "r");
-	if (!file) {
-		SYSERROR("fopen '%s' failed", path);
-		return -1;
-	}
-
-	while (fscanf(file, "%d", &pid) != EOF)
-		count++;
-
-	fclose(file);
-
-	return count;
+	return lxc_cgroup_enter(dirpath, pid);
 }
diff --git a/src/lxc/cgroup.h b/src/lxc/cgroup.h
index 8167f39..709615a 100644
--- a/src/lxc/cgroup.h
+++ b/src/lxc/cgroup.h
@@ -26,13 +26,13 @@
 #define MAXPRIOLEN 24
 
 struct lxc_handler;
-extern int lxc_cgroup_create(const char *name, pid_t pid);
-extern int lxc_cgroup_destroy(const char *name);
-extern int lxc_cgroup_path_get(char **path, const char *subsystem, const char *name);
-extern int lxc_cgroup_nrtasks(const char *name);
-extern int lxc_cgroup_attach(const char *name, pid_t pid);
-extern int lxc_cgroup_prepare_attach(const char *name, void **data);
-extern int lxc_cgroup_finish_attach(void *data, pid_t pid);
-extern int lxc_cgroup_dispose_attach(void *data);
-extern int lxc_ns_is_mounted(void);
+extern int lxc_cgroup_destroy(const char *cgpath);
+extern int lxc_cgroup_path_get(char **path, const char *subsystem, const char *name,
+			      const char *lxcpath);
+extern int lxc_cgroup_nrtasks(const char *cgpath);
+extern char *lxc_cgroup_path_create(const char *lxcgroup, const char *name);
+extern int lxc_cgroup_enter(const char *cgpath, pid_t pid);
+extern int lxc_cgroup_attach(pid_t pid, const char *name, const char *lxcpath);
+extern int cgroup_path_get(char **path, const char *subsystem, const char *cgpath);
+extern int lxc_get_cgpath(const char **path, const char *subsystem, const char *name, const char *lxcpath);
 #endif
diff --git a/src/lxc/commands.c b/src/lxc/commands.c
index 83e7df1..de9a7c4 100644
--- a/src/lxc/commands.c
+++ b/src/lxc/commands.c
@@ -84,10 +84,19 @@ static int fill_sock_name(char *path, int len, const char *name,
 static int receive_answer(int sock, struct lxc_answer *answer)
 {
 	int ret;
+	static char answerpath[MAXPATHLEN];
 
 	ret = lxc_af_unix_recv_fd(sock, &answer->fd, answer, sizeof(*answer));
 	if (ret < 0)
 		ERROR("failed to receive answer for the command");
+	if (answer->pathlen == 0)
+		return ret;
+	ret = recv(sock, answerpath, answer->pathlen, 0);
+	if (ret != answer->pathlen) {
+		ERROR("failed to receive answer for the command");
+		ret = 0;
+	} else
+		answer->path = answerpath;
 
 	return ret;
 }
@@ -202,6 +211,7 @@ extern int  lxc_stop_callback(int, struct lxc_request *, struct lxc_handler *);
 extern int  lxc_state_callback(int, struct lxc_request *, struct lxc_handler *);
 extern int  lxc_pid_callback(int, struct lxc_request *, struct lxc_handler *);
 extern int  lxc_clone_flags_callback(int, struct lxc_request *, struct lxc_handler *);
+extern int lxc_cgroup_callback(int, struct lxc_request *, struct lxc_handler *);
 
 static int trigger_command(int fd, struct lxc_request *request,
 			   struct lxc_handler *handler)
@@ -214,6 +224,7 @@ static int trigger_command(int fd, struct lxc_request *request,
 		[LXC_COMMAND_STATE]       = lxc_state_callback,
 		[LXC_COMMAND_PID]         = lxc_pid_callback,
 		[LXC_COMMAND_CLONE_FLAGS] = lxc_clone_flags_callback,
+		[LXC_COMMAND_CGROUP]      = lxc_cgroup_callback,
 	};
 
 	if (request->type < 0 || request->type >= LXC_COMMAND_MAX)
diff --git a/src/lxc/commands.h b/src/lxc/commands.h
index 0b72cf1..8d77ce8 100644
--- a/src/lxc/commands.h
+++ b/src/lxc/commands.h
@@ -29,6 +29,7 @@ enum {
 	LXC_COMMAND_STATE,
 	LXC_COMMAND_PID,
 	LXC_COMMAND_CLONE_FLAGS,
+	LXC_COMMAND_CGROUP,
 	LXC_COMMAND_MAX,
 };
 
@@ -41,6 +42,8 @@ struct lxc_answer {
 	int fd;
 	int ret; /* 0 on success, -errno on failure */
 	pid_t pid;
+	int pathlen;
+	const char *path;
 };
 
 struct lxc_command {
diff --git a/src/lxc/conf.c b/src/lxc/conf.c
index 7d70c97..add3c74 100644
--- a/src/lxc/conf.c
+++ b/src/lxc/conf.c
@@ -1375,7 +1375,7 @@ static int setup_kmsg(const struct lxc_rootfs *rootfs,
 	return 0;
 }
 
-int setup_cgroup(const char *name, struct lxc_list *cgroups)
+int setup_cgroup(const char *cgpath, struct lxc_list *cgroups)
 {
 	struct lxc_list *iterator;
 	struct lxc_cgroup *cg;
@@ -1388,8 +1388,11 @@ int setup_cgroup(const char *name, struct lxc_list *cgroups)
 
 		cg = iterator->elem;
 
-		if (lxc_cgroup_set(name, cg->subsystem, cg->value))
+		if (lxc_cgroup_set_bypath(cgpath, cg->subsystem, cg->value)) {
+			ERROR("Error setting %s to %s for %s\n", cg->subsystem,
+				cg->value, cgpath);
 			goto out;
+		}
 
 		DEBUG("cgroup '%s' set to '%s'", cg->subsystem, cg->value);
 	}
diff --git a/src/lxc/conf.h b/src/lxc/conf.h
index 4c48b46..f20fb2f 100644
--- a/src/lxc/conf.h
+++ b/src/lxc/conf.h
@@ -282,7 +282,7 @@ struct lxc_conf {
 
 int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf);
 
-extern int setup_cgroup(const char *name, struct lxc_list *cgroups);
+extern int setup_cgroup(const char *cgpath, struct lxc_list *cgroups);
 extern int detect_shared_rootfs(void);
 
 /*
diff --git a/src/lxc/freezer.c b/src/lxc/freezer.c
index e8b1311..261c2c4 100644
--- a/src/lxc/freezer.c
+++ b/src/lxc/freezer.c
@@ -40,16 +40,11 @@
 
 lxc_log_define(lxc_freezer, lxc);
 
-static int freeze_unfreeze(const char *name, int freeze, const char *lxcpath)
+static int do_unfreeze(const char *nsgroup, int freeze, const char *name, const char *lxcpath)
 {
-	char *nsgroup;
 	char freezer[MAXPATHLEN], *f;
 	char tmpf[32];
 	int fd, ret;
-	
-	ret = lxc_cgroup_path_get(&nsgroup, "freezer", name);
-	if (ret)
-		return -1;
 
 	ret = snprintf(freezer, MAXPATHLEN, "%s/freezer.state", nsgroup);
 	if (ret >= MAXPATHLEN) {
@@ -59,7 +54,7 @@ static int freeze_unfreeze(const char *name, int freeze, const char *lxcpath)
 
 	fd = open(freezer, O_RDWR);
 	if (fd < 0) {
-		SYSERROR("failed to open freezer for '%s'", name);
+		SYSERROR("failed to open freezer at '%s'", nsgroup);
 		return -1;
 	}
 
@@ -98,7 +93,8 @@ static int freeze_unfreeze(const char *name, int freeze, const char *lxcpath)
 		ret = strncmp(f, tmpf, strlen(f));
 		if (!ret)
 		{
-			lxc_monitor_send_state(name, freeze ? FROZEN : THAWED, lxcpath);
+			if (name)
+				lxc_monitor_send_state(name, freeze ? FROZEN : THAWED, lxcpath);
 			break;		/* Success */
 		}
 
@@ -122,6 +118,18 @@ out:
 	return ret;
 }
 
+static int freeze_unfreeze(const char *name, int freeze, const char *lxcpath)
+{
+	char *nsgroup;
+	int ret;
+	
+	ret = lxc_cgroup_path_get(&nsgroup, "freezer", name, lxcpath);
+	if (ret)
+		return -1;
+
+	return do_unfreeze(nsgroup, freeze, name, lxcpath);
+}
+
 int lxc_freeze(const char *name, const char *lxcpath)
 {
 	lxc_monitor_send_state(name, FREEZING, lxcpath);
@@ -133,3 +141,14 @@ int lxc_unfreeze(const char *name, const char *lxcpath)
 	return freeze_unfreeze(name, 0, lxcpath);
 }
 
+int lxc_unfreeze_bypath(const char *cgpath)
+{
+	char *nsgroup;
+	int ret;
+	
+	ret = cgroup_path_get(&nsgroup, "freezer", cgpath);
+	if (ret)
+		return -1;
+
+	return do_unfreeze(nsgroup, 0, NULL, NULL);
+}
diff --git a/src/lxc/lxc.h b/src/lxc/lxc.h
index 0fa9480..784b3ca 100644
--- a/src/lxc/lxc.h
+++ b/src/lxc/lxc.h
@@ -118,6 +118,14 @@ extern int lxc_freeze(const char *name, const char *lxcpath);
 extern int lxc_unfreeze(const char *name, const char *lxcpath);
 
 /*
+ * Unfreeze all previously frozen tasks.
+ * This is the function to use from inside the monitor
+ * @name : the name of the container
+ * Return 0 on sucess, < 0 otherwise
+ */
+extern int lxc_unfreeze_bypath(const char *cgpath);
+
+/*
  * Retrieve the container state
  * @name : the name of the container
  * Returns the state of the container on success, < 0 otherwise
@@ -127,24 +135,36 @@ extern lxc_state_t lxc_state(const char *name, const char *lxcpath);
 /*
  * Set a specified value for a specified subsystem. The specified
  * subsystem must be fully specified, eg. "cpu.shares"
+ * @cgpath    : the cgroup path of the container
+ * @filename  : the cgroup attribute filename
+ * @value     : the value to be set
+ * Returns 0 on success, < 0 otherwise
+ */
+extern int lxc_cgroup_set_bypath(const char *cgpath, const char *filename, const char *value);
+
+/*
+ * Set a specified value for a specified subsystem. The specified
+ * subsystem must be fully specified, eg. "cpu.shares"
  * @name      : the name of the container
- * @filename : the cgroup attribute filename
+ * @filename  : the cgroup attribute filename
  * @value     : the value to be set
+ * @lxcpath   : lxc config path for container
  * Returns 0 on success, < 0 otherwise
  */
-extern int lxc_cgroup_set(const char *name, const char *filename, const char *value);
+extern int lxc_cgroup_set(const char *name, const char *filename, const char *value, const char *lxcpath);
 
 /*
  * Get a specified value for a specified subsystem. The specified
  * subsystem must be fully specified, eg. "cpu.shares"
  * @name      : the name of the container
- * @filename : the cgroup attribute filename
+ * @filename  : the cgroup attribute filename
  * @value     : the value to be set
  * @len       : the len of the value variable
+ * @lxcpath   : lxc config path for container
  * Returns the number of bytes read, < 0 on error
  */
 extern int lxc_cgroup_get(const char *name, const char *filename,
-			  char *value, size_t len);
+			  char *value, size_t len, const char *lxcpath);
 
 /*
  * Retrieve the error string associated with the error returned by
diff --git a/src/lxc/lxc_attach.c b/src/lxc/lxc_attach.c
index 1f60266..af920be 100644
--- a/src/lxc/lxc_attach.c
+++ b/src/lxc/lxc_attach.c
@@ -237,7 +237,7 @@ int main(int argc, char *argv[])
 		}
 
 		if (!elevated_privileges) {
-			ret = lxc_cgroup_attach(my_args.name, grandchild);
+			ret = lxc_cgroup_attach(grandchild, my_args.name, my_args.lxcpath);
 			if (ret < 0) {
 				ERROR("failed to attach process to cgroup");
 				return -1;
diff --git a/src/lxc/lxc_cgroup.c b/src/lxc/lxc_cgroup.c
index 970391b..4a55633 100644
--- a/src/lxc/lxc_cgroup.c
+++ b/src/lxc/lxc_cgroup.c
@@ -78,7 +78,7 @@ int main(int argc, char *argv[])
 		value = my_args.argv[1];
 
 	if (value) {
-		if (lxc_cgroup_set(my_args.name, state_object, value)) {
+		if (lxc_cgroup_set(my_args.name, state_object, value, my_args.lxcpath)) {
 			ERROR("failed to assign '%s' value to '%s' for '%s'",
 				value, state_object, my_args.name);
 			return -1;
@@ -88,7 +88,7 @@ int main(int argc, char *argv[])
 		int ret;
 		char buffer[len];
 
-		ret = lxc_cgroup_get(my_args.name, state_object, buffer, len);
+		ret = lxc_cgroup_get(my_args.name, state_object, buffer, len, my_args.lxcpath);
 		if (ret < 0) {
 			ERROR("failed to retrieve value of '%s' for '%s'",
 			      state_object, my_args.name);
diff --git a/src/lxc/lxc_unshare.c b/src/lxc/lxc_unshare.c
index d92a96a..c5d3332 100644
--- a/src/lxc/lxc_unshare.c
+++ b/src/lxc/lxc_unshare.c
@@ -112,7 +112,6 @@ int main(int argc, char *argv[])
 {
 	int opt, status;
 	int ret;
-	char *pid_name;
 	char *namespaces = NULL;
 	char **args;
 	int flags = 0;
@@ -170,14 +169,5 @@ int main(int argc, char *argv[])
 		return -1;
 	}
 
-	if (lxc_ns_is_mounted()) {
-		if (asprintf(&pid_name, "%d", pid) == -1) {
-			ERROR("pid_name: failed to allocate memory");
-			return -1;
-		}
-		lxc_cgroup_destroy(pid_name);
-		free(pid_name);
-	}
-
 	return  lxc_error_set_and_log(pid, status);
 }
diff --git a/src/lxc/lxccontainer.c b/src/lxc/lxccontainer.c
index 404f60a..6569071 100644
--- a/src/lxc/lxccontainer.c
+++ b/src/lxc/lxccontainer.c
@@ -945,7 +945,7 @@ static bool lxcapi_set_cgroup_item(struct lxc_container *c, const char *subsys,
 	if (is_stopped_nolock(c))
 		goto err;
 
-	ret = lxc_cgroup_set(c->name, subsys, value);
+	ret = lxc_cgroup_set(c->name, subsys, value, c->config_path);
 	if (!ret)
 		b = true;
 err:
@@ -966,7 +966,7 @@ static int lxcapi_get_cgroup_item(struct lxc_container *c, const char *subsys, c
 	if (is_stopped_nolock(c))
 		goto out;
 
-	ret = lxc_cgroup_get(c->name, subsys, retv, inlen);
+	ret = lxc_cgroup_get(c->name, subsys, retv, inlen, c->config_path);
 
 out:
 	lxcunlock(c->privlock);
diff --git a/src/lxc/lxcutmp.c b/src/lxc/lxcutmp.c
index 4e74904..54db903 100644
--- a/src/lxc/lxcutmp.c
+++ b/src/lxc/lxcutmp.c
@@ -283,7 +283,7 @@ static int utmp_get_ntasks(struct lxc_handler *handler)
 {
 	int ntasks;
 
-	ntasks = lxc_cgroup_nrtasks(handler->name);
+	ntasks = lxc_cgroup_nrtasks(handler->cgroup);
 
 	if (ntasks < 0) {
 		ERROR("failed to get the number of tasks");
diff --git a/src/lxc/start.c b/src/lxc/start.c
index de431be..f0e82a3 100644
--- a/src/lxc/start.c
+++ b/src/lxc/start.c
@@ -268,6 +268,7 @@ int lxc_pid_callback(int fd, struct lxc_request *request,
 	struct lxc_answer answer;
 	int ret;
 
+	memset(&answer, 0, sizeof(answer));
 	answer.pid = handler->pid;
 	answer.ret = 0;
 
@@ -285,12 +286,49 @@ int lxc_pid_callback(int fd, struct lxc_request *request,
 	return 0;
 }
 
+int lxc_cgroup_callback(int fd, struct lxc_request *request,
+		     struct lxc_handler *handler)
+{
+	struct lxc_answer answer;
+	int ret;
+
+	memset(&answer, 0, sizeof(answer));
+	answer.pathlen = strlen(handler->cgroup) + 1;
+	answer.path = handler->cgroup;
+	answer.ret = 0;
+
+	ret = send(fd, &answer, sizeof(answer), 0);
+	if (ret < 0) {
+		WARN("failed to send answer to the peer");
+		return -1;
+	}
+
+	if (ret != sizeof(answer)) {
+		ERROR("partial answer sent");
+		return -1;
+	}
+
+	ret = send(fd, answer.path, answer.pathlen, 0);
+	if (ret < 0) {
+		WARN("failed to send answer to the peer");
+		return -1;
+	}
+
+	if (ret != answer.pathlen) {
+		ERROR("partial answer sent");
+		return -1;
+	}
+
+	return 0;
+}
+
 int lxc_clone_flags_callback(int fd, struct lxc_request *request,
 			     struct lxc_handler *handler)
 {
 	struct lxc_answer answer;
 	int ret;
 
+	memset(&answer, 0, sizeof(answer));
 	answer.pid = 0;
 	answer.ret = handler->clone_flags;
 
@@ -467,7 +505,7 @@ out_free:
 	return NULL;
 }
 
-void lxc_fini(const char *name, struct lxc_handler *handler)
+static void lxc_fini(const char *name, struct lxc_handler *handler)
 {
 	/* The STOPPING state is there for future cleanup code
 	 * which can take awhile
@@ -487,6 +525,11 @@ void lxc_fini(const char *name, struct lxc_handler *handler)
 	close(handler->conf->maincmd_fd);
 	handler->conf->maincmd_fd = -1;
 	free(handler->name);
+	if (handler->cgroup) {
+		lxc_cgroup_destroy(handler->cgroup);
+		free(handler->cgroup);
+		handler->cgroup = NULL;
+	}
 	free(handler);
 }
 
@@ -756,7 +799,11 @@ int lxc_spawn(struct lxc_handler *handler)
 	if (lxc_sync_wait_child(handler, LXC_SYNC_CONFIGURE))
 		failed_before_rename = 1;
 
-	if (lxc_cgroup_create(name, handler->pid))
+	/* TODO - pass lxc.cgroup.dir (or user's pam cgroup) in for first argument */
+	if ((handler->cgroup = lxc_cgroup_path_create(NULL, name)) == NULL)
+		goto out_delete_net;
+	
+	if (lxc_cgroup_enter(handler->cgroup, handler->pid) < 0)
 		goto out_delete_net;
 
 	if (failed_before_rename)
@@ -786,7 +833,7 @@ int lxc_spawn(struct lxc_handler *handler)
 	if (lxc_sync_barrier_child(handler, LXC_SYNC_POST_CONFIGURE))
 		goto out_delete_net;
 
-	if (setup_cgroup(name, &handler->conf->cgroup)) {
+	if (setup_cgroup(handler->cgroup, &handler->conf->cgroup)) {
 		ERROR("failed to setup the cgroups for '%s'", name);
 		goto out_delete_net;
 	}
@@ -904,7 +951,6 @@ out_fini:
 	lxc_delete_network(handler);
 
 out_fini_nonet:
-	lxc_cgroup_destroy(name);
 	lxc_fini(name, handler);
 	return err;
 
diff --git a/src/lxc/start.h b/src/lxc/start.h
index a0afa87..5b3488f 100644
--- a/src/lxc/start.h
+++ b/src/lxc/start.h
@@ -51,6 +51,7 @@ struct lxc_handler {
 #endif
 	int pinfd;
 	const char *lxcpath;
+	char *cgroup;
 };
 
 extern struct lxc_handler *lxc_init(const char *name, struct lxc_conf *, const char *);
@@ -58,7 +59,6 @@ extern int lxc_spawn(struct lxc_handler *);
 
 extern int lxc_poll(const char *name, struct lxc_handler *handler);
 extern void lxc_abort(const char *name, struct lxc_handler *handler);
-extern void lxc_fini(const char *name, struct lxc_handler *handler);
 extern int lxc_set_state(const char *, struct lxc_handler *, lxc_state_t);
 extern int lxc_check_inherited(struct lxc_conf *conf, int fd_to_ignore);
 int __lxc_start(const char *, struct lxc_conf *, struct lxc_operations *,
diff --git a/src/lxc/state.c b/src/lxc/state.c
index 9fcb06d..169aaf5 100644
--- a/src/lxc/state.c
+++ b/src/lxc/state.c
@@ -66,7 +66,7 @@ lxc_state_t lxc_str2state(const char *state)
 	return -1;
 }
 
-static int freezer_state(const char *name)
+static int freezer_state(const char *name, const char *lxcpath)
 {
 	char *nsgroup;
 	char freezer[MAXPATHLEN];
@@ -74,7 +74,7 @@ static int freezer_state(const char *name)
 	FILE *file;
 	int err;
 
-	err = lxc_cgroup_path_get(&nsgroup, "freezer", name);
+	err = lxc_cgroup_path_get(&nsgroup, "freezer", name, lxcpath);
 	if (err)
 		return -1;
 
@@ -132,7 +132,7 @@ static lxc_state_t __lxc_getstate(const char *name, const char *lxcpath)
 
 lxc_state_t lxc_getstate(const char *name, const char *lxcpath)
 {
-	int state = freezer_state(name);
+	int state = freezer_state(name, lxcpath);
 	if (state != FROZEN && state != FREEZING)
 		state = __lxc_getstate(name, lxcpath);
 	return state;
@@ -148,6 +148,7 @@ extern int lxc_state_callback(int fd, struct lxc_request *request,
 	struct lxc_answer answer;
 	int ret;
 
+	memset(&answer, 0, sizeof(answer));
 	answer.ret = handler->state;
 
 	ret = send(fd, &answer, sizeof(answer), 0);
diff --git a/src/lxc/stop.c b/src/lxc/stop.c
index c796283..851a4bf 100644
--- a/src/lxc/stop.c
+++ b/src/lxc/stop.c
@@ -83,9 +83,10 @@ extern int lxc_stop_callback(int fd, struct lxc_request *request,
 	struct lxc_answer answer;
 	int ret;
 
+	memset(&answer, 0, sizeof(answer));
 	answer.ret = kill(handler->pid, SIGKILL);
 	if (!answer.ret) {
-		ret = lxc_unfreeze(handler->name, handler->lxcpath);
+		ret = lxc_unfreeze_bypath(handler->cgroup);
 		if (!ret)
 			return 0;
 
diff --git a/src/tests/Makefile.am b/src/tests/Makefile.am
index 0f4ee1d..b9f74f0 100644
--- a/src/tests/Makefile.am
+++ b/src/tests/Makefile.am
@@ -12,6 +12,7 @@ lxc_test_shutdowntest_SOURCES = shutdowntest.c
 lxc_test_get_item_SOURCES = get_item.c
 lxc_test_getkeys_SOURCES = getkeys.c
 lxc_test_lxcpath_SOURCES = lxcpath.c
+lxc_test_cgpath_SOURCES = cgpath.c
 
 AM_CFLAGS=-I$(top_srcdir)/src \
 	-DLXCROOTFSMOUNT=\"$(LXCROOTFSMOUNT)\" \
@@ -21,6 +22,7 @@ AM_CFLAGS=-I$(top_srcdir)/src \
 
 bin_PROGRAMS = lxc-test-containertests lxc-test-locktests lxc-test-startone \
 	lxc-test-destroytest lxc-test-saveconfig lxc-test-createtest \
-	lxc-test-shutdowntest lxc-test-get_item lxc-test-getkeys lxc-test-lxcpath
+	lxc-test-shutdowntest lxc-test-get_item lxc-test-getkeys lxc-test-lxcpath \
+	lxc-test-cgpath
 
 endif
diff --git a/src/tests/cgpath.c b/src/tests/cgpath.c
new file mode 100644
index 0000000..f05c435
--- /dev/null
+++ b/src/tests/cgpath.c
@@ -0,0 +1,164 @@
+/* liblxcapi
+ *
+ * Copyright © 2012 Serge Hallyn <serge.hallyn at ubuntu.com>.
+ * Copyright © 2012 Canonical Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#include "../lxc/lxccontainer.h"
+
+#include <unistd.h>
+#include <signal.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdlib.h>
+#include <errno.h>
+#include "../lxc/cgroup.h"
+#include "../lxc/lxc.h"
+
+#define MYNAME "lxctest1"
+#define MYNAME2 "lxctest2"
+
+#define TSTERR(x) do { \
+	fprintf(stderr, "%d: %s\n", __LINE__, x); \
+} while (0)
+
+int main()
+{
+	struct lxc_container *c = NULL, *c2 = NULL;
+	char *path;
+	int len;
+	int ret, retv = -1;
+
+	/* won't require privilege necessarily once users are classified by
+	 * pam_cgroup */
+	if (geteuid() != 0) {
+		TSTERR("requires privilege");
+		exit(0);
+	}
+
+	printf("Basic cgroup path tests...\n");
+	path = lxc_cgroup_path_create(NULL, MYNAME);
+	len = strlen(path);
+	if (!path || !len) {
+		TSTERR("zero result from lxc_cgroup_path_create");
+		exit(1);
+	}
+	if (!strstr(path, "lxc/" MYNAME)) {
+		TSTERR("lxc_cgroup_path_create NULL lxctest1");
+		exit(1);
+	}
+	free(path);
+
+	path = lxc_cgroup_path_create("ab", MYNAME);
+	len = strlen(path);
+	if (!path || !len) {
+		TSTERR("zero result from lxc_cgroup_path_create");
+		exit(1);
+	}
+	if (!strstr(path, "ab/" MYNAME)) {
+		TSTERR("lxc_cgroup_path_create ab lxctest1");
+		exit(1);
+	}
+	free(path);
+	printf("... passed\n");
+
+	printf("Container creation tests...\n");
+
+	if ((c = lxc_container_new(MYNAME, NULL)) == NULL) {
+		TSTERR("instantiating first container");
+		exit(1);
+	}
+	if (c->is_defined(c)) {
+		c->stop(c);
+		c->destroy(c);
+		c = lxc_container_new(MYNAME, NULL);
+	}
+	c->set_config_item(c, "lxc.network.type", "empty");
+	if (!c->createl(c, "ubuntu", NULL)) {
+		TSTERR("creating first container");
+		exit(1);
+	}
+	c->load_config(c, NULL);
+	c->want_daemonize(c);
+	if (!c->startl(c, 0, NULL)) {
+		TSTERR("starting first container");
+		goto out;
+	}
+	printf("first container passed.  Now two containers...\n");
+
+	char *nsgroup;
+#define ALTBASE "/var/lib/lxctest2"
+	ret = mkdir(ALTBASE, 0755);
+
+	ret = lxc_cgroup_path_get(&nsgroup, "freezer", MYNAME, c->get_config_path(c));
+	if (ret < 0 || !strstr(nsgroup, "lxc/" MYNAME)) {
+		TSTERR("getting first cgroup path from lxc_command");
+		goto out;
+	}
+
+	/* start second container */
+	if ((c2 = lxc_container_new(MYNAME2, ALTBASE)) == NULL) {
+		TSTERR("instantiating first container");
+		goto out;
+	}
+	if (c2->is_defined(c2)) {
+		c2->stop(c2);
+		c2->destroy(c2);
+		c2 = lxc_container_new(MYNAME2, ALTBASE);
+	}
+	c2->set_config_item(c2, "lxc.network.type", "empty");
+	if (!c2->createl(c2, "ubuntu", NULL)) {
+		TSTERR("creating first container");
+		goto out;
+	}
+
+	c2->load_config(c2, NULL);
+	c2->want_daemonize(c2);
+	if (!c2->startl(c2, 0, NULL)) {
+		TSTERR("starting first container");
+		goto out;
+	}
+
+	ret = lxc_cgroup_path_get(&nsgroup, "freezer", MYNAME2, c2->get_config_path(c2));
+	if (ret < 0 || !strstr(nsgroup, "lxc/" MYNAME2)) {
+		TSTERR("getting second cgroup path from lxc_command");
+		goto out;
+	}
+
+	const char *dirpath;
+	if (lxc_get_cgpath(&dirpath, NULL, c2->name, c2->config_path) < 0) {
+		TSTERR("getting second container's cgpath");
+		return -1;
+	}
+
+	if (lxc_cgroup_nrtasks(dirpath) < 1) {
+		TSTERR("getting nrtasks");
+		goto out;
+	}
+	printf("...passed\n");
+
+	retv = 0;
+out:
+	if (c2) {
+		c2->stop(c2);
+		c2->destroy(c2);
+	}
+	if (c) {
+		c->stop(c);
+		c->destroy(c);
+	}
+	return retv;
+}
diff --git a/src/tests/lxcpath.c b/src/tests/lxcpath.c
index 92525ed..a849a38 100644
--- a/src/tests/lxcpath.c
+++ b/src/tests/lxcpath.c
@@ -29,7 +29,7 @@
 #define MYNAME "lxctest1"
 
 #define TSTERR(x) do { \
-	fprintf(stderr, "%d: %s", __LINE__, x); \
+	fprintf(stderr, "%d: %s\n", __LINE__, x); \
 } while (0)
 
 int main()
-- 
1.8.1.2





More information about the lxc-devel mailing list