[lxc-devel] [PATCH RFC] Cgroups: rework to support groups and multiple simultaneous same-named containers

Serge Hallyn serge.hallyn at ubuntu.com
Fri Feb 15 21:49:42 UTC 2013


(This is just to show where I ended up in trying to support this
feature.  It rewrites more code than I'd like, but the end result
seemed to work and do what we want.)

Add a monitor command to get the cgroup for a running container.  This
allows container r1 started from /var/lib/lxc and container r1 started
from /home/ubuntu/lxcbase to pick unique cgroup directories (which
will be /sys/fs/cgroup/$subsys/lxc/r1 and .../r1-1), and all the lxc-*
tools to get that path over the monitor at lxcpath.

Rework the cgroup code.  Before, if /sys/fs/cgroup/$subsys/lxc/r1
already existed, it would be moved to 'deadXXXXX', and a new r1 created.
Instead, take r1-1, r1-2, etc.

I ended up removing both the use of cgroup.clone_children and support
for ns cgroup.  Presumably we'll want to put support for ns cgroup
back in for older kernels.

Instead of using clone_children, I manually copy the parent's cpuset
mems and cpus files (the only ones requiring clone_children).

Note that upstream kernel is working toward strict hierarchical
limit enforcements, which will be good for us.

NOTE - I am changing the lxc_answer struct size.  This means that
upgrades to this version while containers are running will result
in lxc_* commands on pre-running containers will fail.

(NOTE2 - ignore the testcase for now...  I did start with it,
but as I was working out some monitor bugs I let it languish)

NOTE3 - oh yeah, lxc-attach's cgroup support still needs to be
reimplemented.

Signed-off-by: Serge Hallyn <serge.hallyn at ubuntu.com>
---
 src/lxc/cgroup.c       | 817 ++++++++++++++++++++-----------------------------
 src/lxc/cgroup.h       |  17 +-
 src/lxc/commands.c     |  11 +
 src/lxc/commands.h     |   3 +
 src/lxc/conf.c         |   7 +-
 src/lxc/conf.h         |   2 +-
 src/lxc/freezer.c      |  35 ++-
 src/lxc/lxc.h          |  28 +-
 src/lxc/lxc_attach.c   |  96 ++++--
 src/lxc/lxc_cgroup.c   |   4 +-
 src/lxc/lxc_unshare.c  |  10 -
 src/lxc/lxccontainer.c |   4 +-
 src/lxc/start.c        |  54 +++-
 src/lxc/start.h        |   2 +-
 src/lxc/state.c        |   7 +-
 src/lxc/stop.c         |   3 +-
 src/tests/Makefile.am  |   4 +-
 src/tests/cgpath.c     | 142 +++++++++
 src/tests/lxcpath.c    |   2 +-
 19 files changed, 692 insertions(+), 556 deletions(-)
 create mode 100644 src/tests/cgpath.c

diff --git a/src/lxc/cgroup.c b/src/lxc/cgroup.c
index 28f3474..7b13753 100644
--- a/src/lxc/cgroup.c
+++ b/src/lxc/cgroup.c
@@ -38,6 +38,7 @@
 
 #include "error.h"
 #include "config.h"
+#include "commands.h"
 
 #include <lxc/log.h>
 #include <lxc/cgroup.h>
@@ -53,11 +54,6 @@ lxc_log_define(lxc_cgroup, lxc);
 
 #define MTAB "/proc/mounts"
 
-enum {
-	CGROUP_NS_CGROUP = 1,
-	CGROUP_CLONE_CHILDREN,
-};
-
 /* Check if a mount is a cgroup hierarchy for any subsystem.
  * Return the first subsystem found (or NULL if none).
  */
@@ -104,11 +100,12 @@ static char *mount_has_subsystem(const struct mntent *mntent)
  * Returns a pointer to the answer, which may be "".
  */
 static char *get_init_cgroup(const char *subsystem, struct mntent *mntent,
-			     char *dsg)
+			     char *dsg, int prependslash)
 {
 	FILE *f;
 	char *c, *c2;
 	char line[MAXPATHLEN];
+	int ret;
 
 	*dsg = '\0';
 	f = fopen("/proc/1/cgroup", "r");
@@ -134,10 +131,18 @@ static char *get_init_cgroup(const char *subsystem, struct mntent *mntent,
 good:
 		DEBUG("get_init_cgroup: found init cgroup for subsys %s at %s\n",
 			subsystem, c2);
-		strncpy(dsg, c2, MAXPATHLEN);
-		c = &dsg[strlen(dsg)-1];
+		ret = snprintf(dsg, MAXPATHLEN, "%s%s", prependslash ? "/" : "", c2);
+		if (ret < 0 || ret >= MAXPATHLEN) {
+			WARN("init cgroup path name was too long.");
+			goto found;
+		}
+		if (ret < 1)
+			goto found;
+
+		c = &dsg[ret-1];
 		if (*c == '\n')
 			*c = '\0';
+
 		goto found;
 	}
 
@@ -146,27 +151,15 @@ found:
 	return dsg;
 }
 
-static int get_cgroup_flags(struct mntent *mntent)
-{
-	int flags = 0;
-
-
-	if (hasmntopt(mntent, "ns"))
-		flags |= CGROUP_NS_CGROUP;
-
-	if (hasmntopt(mntent, "clone_children"))
-		flags |= CGROUP_CLONE_CHILDREN;
-
-	DEBUG("cgroup %s has flags 0x%x", mntent->mnt_dir, flags);
-	return flags;
-}
-
+/*
+ * Return cgroup mountpoint plus init cgroup
+ */
 static int get_cgroup_mount(const char *subsystem, char *mnt)
 {
 	struct mntent *mntent;
-	char initcgroup[MAXPATHLEN];
+	char initcgroup[MAXPATHLEN], *init;
 	FILE *file = NULL;
-	int ret, flags, err = -1;
+	int ret, err = -1;
 
 	file = setmntent(MTAB, "r");
 	if (!file) {
@@ -181,16 +174,14 @@ static int get_cgroup_mount(const char *subsystem, char *mnt)
 		if (subsystem) {
 			if (!hasmntopt(mntent, subsystem))
 				continue;
-		}
-		else {
+		} else {
 			if (!mount_has_subsystem(mntent))
 				continue;
 		}
 
-		flags = get_cgroup_flags(mntent);
-		ret = snprintf(mnt, MAXPATHLEN, "%s%s%s", mntent->mnt_dir,
-			       get_init_cgroup(subsystem, NULL, initcgroup),
-		               (flags & CGROUP_NS_CGROUP) ? "" : "/lxc");
+		init = get_init_cgroup(NULL, mntent, initcgroup, 1);
+		ret = snprintf(mnt, MAXPATHLEN, "%s%s", mntent->mnt_dir,
+				init);
 		if (ret < 0 || ret >= MAXPATHLEN)
 			goto fail;
 
@@ -207,180 +198,285 @@ out:
 	return err;
 }
 
-int lxc_ns_is_mounted(void)
+/*
+ * cgroup_path_get: put into *path the pathname for
+ * %subsystem and cgroup %name.  If %subsystem is NULL, then
+ * the first mounted cgroup will be used (for nr_tasks)
+ */
+int cgroup_path_get(char **path, const char *subsystem, const char *cgpath)
 {
 	static char        buf[MAXPATHLEN];
+	static char        retbuf[MAXPATHLEN];
+	int rc;
+
+	/* lxc_cgroup_set passes a state object for the subsystem,
+	 * so trim it to just the subsystem part */
+	if (subsystem) {
+		rc = snprintf(retbuf, MAXPATHLEN, "%s", subsystem);
+		if (rc < 0 || rc >= MAXPATHLEN) {
+			ERROR("subsystem name too long");
+			return -1;
+		}
+		char *s = index(retbuf, '.');
+		if (s)
+			*s = '\0';
+		DEBUG("%s: called for subsys %s name %s\n", __func__, retbuf, cgpath);
+	}
+	if (get_cgroup_mount(subsystem ? retbuf : NULL, buf)) {
+		ERROR("cgroup is not mounted");
+		return -1;
+	}
 
-	return (get_cgroup_mount("ns", buf) == 0);
+	rc = snprintf(retbuf, MAXPATHLEN, "%s/%s", buf, cgpath);
+	if (rc < 0 || rc >= MAXPATHLEN) {
+		ERROR("name too long");
+		return -1;
+	}
+
+	DEBUG("%s: returning %s for subsystem %s", __func__, retbuf, subsystem);
+
+	*path = retbuf;
+	return 0;
 }
 
-static int cgroup_rename_nsgroup(const char *mnt, const char *name, pid_t pid)
+
+/*
+ * lxc_cgroup_path_get: put into *path the pathname for
+ * %subsystem and cgroup %name.  If %subsystem is NULL, then
+ * the first mounted cgroup will be used (for nr_tasks)
+ *
+ * This is the exported function, which determines cgpath from the
+ * monitor running in lxcpath.
+ */
+int lxc_cgroup_path_get(char **path, const char *subsystem, const char *name, const char *lxcpath)
 {
-	char oldname[MAXPATHLEN];
-	char newname[MAXPATHLEN];
-	int ret;
+	struct lxc_command command = {
+		.request = { .type = LXC_COMMAND_CGROUP },
+	};
 
-	ret = snprintf(oldname, MAXPATHLEN, "%s/%d", mnt, pid);
-	if (ret >= MAXPATHLEN)
-		return -1;
+	int ret, stopped = 0;
 
-	ret = snprintf(newname, MAXPATHLEN, "%s/%s", mnt, name);
-	if (ret >= MAXPATHLEN)
+	ret = lxc_command(name, &command, &stopped, lxcpath);
+	if (ret < 0 && stopped)
+		return STOPPED;
+
+	if (ret < 0) {
+		ERROR("failed to send command");
 		return -1;
+	}
 
-	if (rename(oldname, newname)) {
-		SYSERROR("failed to rename cgroup %s->%s", oldname, newname);
+	if (!ret) {
+		WARN("'%s' has stopped before sending its state", name);
 		return -1;
 	}
 
-	DEBUG("'%s' renamed to '%s'", oldname, newname);
+	if (command.answer.ret < 0 || command.answer.pathlen < 0) {
+		ERROR("failed to get state for '%s': %s",
+			name, strerror(-command.answer.ret));
+		return -1;
+	}
 
-	return 0;
+	return cgroup_path_get(path, subsystem, command.answer.path);
 }
 
-static int cgroup_enable_clone_children(const char *path)
+static int do_cgroup_set(const char *path, const char *filename, const char *value)
 {
-	FILE *f;
-	int ret = 0;
+	int fd, ret;
 
-	f = fopen(path, "w");
-	if (!f) {
-		SYSERROR("failed to open '%s'", path);
+	fd = open(path, O_WRONLY);
+	if (fd < 0) {
+		SYSERROR("open %s : %s", path, strerror(errno));
 		return -1;
 	}
 
-	if (fprintf(f, "1") < 1) {
-		ERROR("failed to write flag to '%s'", path);
-		ret = -1;
-	}
+	ret = write(fd, value, strlen(value));
 
-	fclose(f);
+	if (ret < 0) {
+		close(fd);
+		SYSERROR("write %s : %s", path, strerror(errno));
+		return ret;
+	}
 
-	return ret;
+	close(fd);
+	return 0;
 }
 
-static int lxc_one_cgroup_finish_attach(int fd, pid_t pid)
+int lxc_cgroup_set_bypath(const char *cgpath, const char *filename, const char *value)
 {
-       char buf[32];
-       int ret;
+	int ret;
+	char *dirpath;
+	char path[MAXPATHLEN];
 
-       snprintf(buf, 32, "%ld", (long)pid);
+	ret = cgroup_path_get(&dirpath, filename, cgpath);
+	if (ret)
+		return -1;
 
-       ret = write(fd, buf, strlen(buf));
-       if (ret <= 0) {
-               SYSERROR("failed to write pid '%ld' to fd '%d'", (long)pid, fd);
-               ret = -1;
-       } else {
-               ret = 0;
-       }
+	ret = snprintf(path, MAXPATHLEN, "%s/%s", dirpath, filename);
+	if (ret < 0 || ret >= MAXPATHLEN) {
+		ERROR("pathname too long");
+		return -1;
+	}
 
-       close(fd);
-       return ret;
+	return do_cgroup_set(path, filename, value);
 }
 
-static int lxc_one_cgroup_dispose_attach(int fd)
+int lxc_cgroup_set(const char *name, const char *filename, const char *value,
+		   const char *lxcpath)
 {
-       close(fd);
-       return 0;
+	int ret;
+	char *dirpath;
+	char path[MAXPATHLEN];
+
+	ret = lxc_cgroup_path_get(&dirpath, filename, name, lxcpath);
+	if (ret)
+		return -1;
+
+	ret = snprintf(path, MAXPATHLEN, "%s/%s", dirpath, filename);
+	if (ret < 0 || ret >= MAXPATHLEN) {
+		ERROR("pathname too long");
+		return -1;
+	}
+
+	return do_cgroup_set(path, filename, value);
 }
 
-static int lxc_one_cgroup_prepare_attach(const char *name,
-					 struct mntent *mntent)
+/*
+ * If you pass in NULL value or 0 len, then you are asking for the size
+ * of the file.  Note that we can't get the file size quickly through stat
+ * or lseek.  Therefore if you pass in len > 0 but less than the file size,
+ * your only indication will be that the return value will be equal to the
+ * passed-in ret.  We will not return the actual full file size.
+ */
+int lxc_cgroup_get(const char *name, const char *filename, char *value,
+		   size_t len, const char *lxcpath)
 {
-	int fd;
-	char tasks[MAXPATHLEN], initcgroup[MAXPATHLEN];
-	char *cgmnt = mntent->mnt_dir;
-	int flags;
+	int fd, ret = -1;
+	char *dirpath;
+	char path[MAXPATHLEN];
 	int rc;
 
-	flags = get_cgroup_flags(mntent);
+	ret = lxc_cgroup_path_get(&dirpath, filename, name, lxcpath);
+	if (ret)
+		return -1;
 
-	rc = snprintf(tasks, MAXPATHLEN, "%s%s%s/%s/tasks", cgmnt,
-	         get_init_cgroup(NULL, mntent, initcgroup),
-	         (flags & CGROUP_NS_CGROUP) ? "" : "/lxc",
-	         name);
+	rc = snprintf(path, MAXPATHLEN, "%s/%s", dirpath, filename);
 	if (rc < 0 || rc >= MAXPATHLEN) {
 		ERROR("pathname too long");
 		return -1;
 	}
 
-	fd = open(tasks, O_WRONLY);
+	fd = open(path, O_RDONLY);
 	if (fd < 0) {
-		SYSERROR("failed to open '%s'", tasks);
+		ERROR("open %s : %s", path, strerror(errno));
 		return -1;
 	}
 
-	return fd;
-}
-
-static int lxc_one_cgroup_attach(const char *name, struct mntent *mntent, pid_t pid)
-{
-	int fd;
-
-	fd = lxc_one_cgroup_prepare_attach(name, mntent);
-	if (fd < 0) {
-		return -1;
+	if (!len || !value) {
+		char buf[100];
+		int count = 0;
+		while ((ret = read(fd, buf, 100)) > 0)
+			count += ret;
+		if (ret >= 0)
+			ret = count;
+	} else {
+		memset(value, 0, len);
+		ret = read(fd, value, len);
 	}
 
-	return lxc_one_cgroup_finish_attach(fd, pid);
+	if (ret < 0)
+		ERROR("read %s : %s", path, strerror(errno));
+
+	close(fd);
+	return ret;
 }
 
-int lxc_cgroup_dispose_attach(void *data)
+int lxc_cgroup_nrtasks(const char *cgpath)
 {
-	int *fds = data;
-	int ret, err;
+	char *dpath;
+	char path[MAXPATHLEN];
+	int pid, ret, count = 0;
+	FILE *file;
+	int rc;
 
-	if (!fds) {
-		return 0;
-	}
+	ret = cgroup_path_get(&dpath, NULL, cgpath);
+	if (ret)
+		return -1;
 
-	ret = 0;
+	rc = snprintf(path, MAXPATHLEN, "%s/tasks", dpath);
+	if (rc < 0 || rc >= MAXPATHLEN) {
+		ERROR("pathname too long");
+		return -1;
+	}
 
-	for (; *fds >= 0; fds++) {
-		err = lxc_one_cgroup_dispose_attach(*fds);
-		if (err) {
-			ret = err;
-		}
+	file = fopen(path, "r");
+	if (!file) {
+		SYSERROR("fopen '%s' failed", path);
+		return -1;
 	}
 
-	free(data);
+	while (fscanf(file, "%d", &pid) != EOF)
+		count++;
 
-	return ret;
+	fclose(file);
+
+	return count;
 }
 
-int lxc_cgroup_finish_attach(void *data, pid_t pid)
+static long get_value(const char *dir, const char *file)
 {
-	int *fds = data;
-	int err;
+	FILE *f;
+	char path[MAXPATHLEN];
+	int ret, retv;
 
-	if (!fds) {
+	retv = snprintf(path, MAXPATHLEN, "%s/%s", dir, file);
+	if (retv < 0 || retv >= MAXPATHLEN)
 		return 0;
-	}
+	f = fopen(path, "r");
+	ret = fscanf(f, "%d", &retv);
+	fclose(f);
+	if (ret != 1)
+		return 0;
+	return retv;
+}
 
-	for (; *fds >= 0; fds++) {
-		err = lxc_one_cgroup_finish_attach(*fds, pid);
-		if (err) {
-			/* get rid of the rest of them */
-			lxc_cgroup_dispose_attach(data);
-			return -1;
-		}
-		*fds = -1;
-	}
+static void set_value(const char *dir, const char *file, long v)
+{
+	FILE *f;
+	char path[MAXPATHLEN];
+	int retv;
 
-	free(data);
+	retv = snprintf(path, MAXPATHLEN, "%s/%s", dir, file);
+	if (retv < 0 || retv >= MAXPATHLEN)
+		return;
+	f = fopen(path, "w");
+	fprintf(f, "%ld\n", v);
+	fclose(f);
+}
 
-	return 0;
+static void setup_cpuset(const char *path)
+{
+	/* copy parent values for mems_allowed and cpus_allowed */
+	char *parentpath = strdup(path);
+	char *p;
+	long v;
+	if ((p = rindex(parentpath, '/')) == NULL)
+		goto out;
+	v = get_value(parentpath, "cpuset.mems");
+	set_value(path, "cpuset.mems", v);
+	v = get_value(parentpath, "cpuset.cpus");
+	set_value(path, "cpuset.cpus", v);
+	
+out:
+	free(parentpath);
 }
 
-int lxc_cgroup_prepare_attach(const char *name, void **data)
+/* sanity check - make sure that '/init_cgroup/lxc_group' exists */
+static int create_lxcgroups(const char *lxcgroup)
 {
-	struct mntent *mntent;
 	FILE *file = NULL;
-	int err = -1;
-	int found = 0;
-	int *fds;
-	int i;
-	static const int MAXFDS = 256;
+	struct mntent *mntent;
+	int ret, retv = -1;
+	char path[MAXPATHLEN], initpath[MAXPATHLEN];
 
 	file = setmntent(MTAB, "r");
 	if (!file) {
@@ -388,202 +484,121 @@ int lxc_cgroup_prepare_attach(const char *name, void **data)
 		return -1;
 	}
 
-	/* create a large enough buffer for all practical
-	 * use cases
-	 */
-	fds = malloc(sizeof(int) * MAXFDS);
-	if (!fds) {
-		err = -1;
-		goto out;
-	}
-	for (i = 0; i < MAXFDS; i++) {
-		fds[i] = -1;
-	}
-
-	err = 0;
-	i = 0;
 	while ((mntent = getmntent(file))) {
-		if (i >= MAXFDS - 1) {
-			ERROR("too many cgroups to attach to, aborting");
-			lxc_cgroup_dispose_attach(fds);
-			errno = ENOMEM;
-			err = -1;
-			goto out;
-		}
-
-		DEBUG("checking '%s' (%s)", mntent->mnt_dir, mntent->mnt_type);
+		char *init = get_init_cgroup(NULL, mntent, initpath, 1);
 
 		if (strcmp(mntent->mnt_type, "cgroup"))
 			continue;
 		if (!mount_has_subsystem(mntent))
 			continue;
 
-		INFO("[%d] found cgroup mounted at '%s',opts='%s'",
-		     ++found, mntent->mnt_dir, mntent->mnt_opts);
-
-		fds[i] = lxc_one_cgroup_prepare_attach(name, mntent);
-		if (fds[i] < 0) {
-			err = fds[i];
-			lxc_cgroup_dispose_attach(fds);
-			goto out;
+		/* 
+		 * TODO - handle case where lxcgroup has subdirs?  (i.e. build/l1)
+		 * May not be worthwhile - remember cgroup depth has perf penalties
+		 * */
+		ret = snprintf(path, MAXPATHLEN, "%s%s/%s",
+			       mntent->mnt_dir, init, lxcgroup ? lxcgroup : "lxc");
+		if (ret < 0 || ret >= MAXPATHLEN)
+			goto fail;
+		if (access(path, F_OK)) {
+			ret = mkdir(path, 0755);
+			if (ret == -1 && errno != EEXIST) {
+				SYSERROR("failed to create '%s' directory", path);
+				goto fail;
+			}
+		} else if (hasmntopt(mntent, "cpuset")) {
+			setup_cpuset(path);
 		}
-		i++;
-	};
-
-	if (!found)
-		ERROR("No cgroup mounted on the system");
-
-	*data = fds;
-
-out:
-	endmntent(file);
-	return err;
-}
-
-/*
- * for each mounted cgroup, attach a pid to the cgroup for the container
- */
-int lxc_cgroup_attach(const char *name, pid_t pid)
-{
-	void *data = NULL;
-	int ret;
-
-	ret = lxc_cgroup_prepare_attach(name, &data);
-	if (ret < 0) {
-		return ret;
 	}
 
-	return lxc_cgroup_finish_attach(data, pid);
+	retv = 0;
+fail:
+	endmntent(file);
+	return retv;
 }
 
 /*
- * rename cgname, which is under cgparent, to a new name starting
- * with 'cgparent/dead'.  That way cgname can be reused.  Return
- * 0 on success, -1 on failure.
+ * XXX This should probably be locked globally
+ * 
+ * Races won't be determintal, you'll just end up with leftover unused cgroups
  */
-int try_to_move_cgname(char *cgparent, char *cgname)
+char *lxc_cgroup_path_create(const char *lxcgroup, const char *name)
 {
-	char *newdir;
+	int i = 0, ret;
+	char *retpath, path[MAXPATHLEN], initpath[MAXPATHLEN], *init;
+	char tail[12];
+	FILE *file = NULL;
+	struct mntent *mntent;
 
-	/* tempnam problems don't matter here - cgroupfs will prevent
-	 * duplicates if we race, and we'll just fail at that (unlikely)
-	 * point
-	 */
+	if (create_lxcgroups(lxcgroup) < 0)
+		return NULL;
 
-	newdir = tempnam(cgparent, "dead");
-	if (!newdir)
-		return -1;
-	if (rename(cgname, newdir))
-		return -1;
-	WARN("non-empty cgroup %s renamed to %s, please manually inspect it\n",
-		cgname, newdir);
-
-	return 0;
-}
-
-/*
- * create a cgroup for the container in a particular subsystem.
- */
-static int lxc_one_cgroup_create(const char *name,
-				 struct mntent *mntent, pid_t pid)
-{
-	char cginit[MAXPATHLEN], cgname[MAXPATHLEN], cgparent[MAXPATHLEN];
-	char clonechild[MAXPATHLEN];
-	char initcgroup[MAXPATHLEN];
-	int flags, ret;
-
-	/* cgparent is the parent dir, e.g., /sys/fs/cgroup/<cgroup>/<init-cgroup>/lxc */
-	/* (remember get_init_cgroup() returns a path starting with '/') */
-	/* cgname is the full name, e.g., /sys/fs/cgroup/<cgroup>/<init-cgroup>/lxc/name */
-	ret = snprintf(cginit, MAXPATHLEN, "%s%s", mntent->mnt_dir,
-		get_init_cgroup(NULL, mntent, initcgroup));
-	if (ret < 0 || ret >= MAXPATHLEN) {
-		SYSERROR("Failed creating pathname for init's cgroup (%d)\n", ret);
-		return -1;
+again:
+	file = setmntent(MTAB, "r");
+	if (!file) {
+		SYSERROR("failed to open %s", MTAB);
+		return NULL;
 	}
 
-	flags = get_cgroup_flags(mntent);
-
-	ret = snprintf(cgparent, MAXPATHLEN, "%s%s", cginit,
-		       (flags & CGROUP_NS_CGROUP) ? "" : "/lxc");
-	if (ret < 0 || ret >= MAXPATHLEN) {
-		SYSERROR("Failed creating pathname for cgroup parent (%d)\n", ret);
-		return -1;
-	}
-	ret = snprintf(cgname, MAXPATHLEN, "%s/%s", cgparent, name);
-	if (ret < 0 || ret >= MAXPATHLEN) {
-		SYSERROR("Failed creating pathname for cgroup (%d)\n", ret);
-		return -1;
-	}
+	if (i)
+		snprintf(tail, 12, "-%d", i);
+	else
+		*tail = '\0';
 
-	/* Do we have the deprecated ns_cgroup subsystem? */
-	if (flags & CGROUP_NS_CGROUP) {
-		WARN("using deprecated ns_cgroup");
-		return cgroup_rename_nsgroup(cginit, name, pid);
-	}
+	while ((mntent = getmntent(file))) {
 
-	ret = snprintf(clonechild, MAXPATHLEN, "%s/cgroup.clone_children",
-		       cginit);
-	if (ret < 0 || ret >= MAXPATHLEN) {
-		SYSERROR("Failed creating pathname for clone_children (%d)\n", ret);
-		return -1;
-	}
+		if (strcmp(mntent->mnt_type, "cgroup"))
+			continue;
+		if (!mount_has_subsystem(mntent))
+			continue;
 
-	/* we check if the kernel has clone_children, at this point if there
-	 * no clone_children neither ns_cgroup, that means the cgroup is mounted
-	 * without the ns_cgroup and it has not the compatibility flag
-	 */
-	if (access(clonechild, F_OK)) {
-		ERROR("no ns_cgroup option specified");
-		return -1;
-	}
+		init = get_init_cgroup(NULL, mntent, initpath, 1);
+		/* find unused mnt_dir + init_cgroup + lxcgroup + name + -$i */
+		ret = snprintf(path, MAXPATHLEN, "%s%s/%s/%s%s",
+			       mntent->mnt_dir, init,
+			       lxcgroup ? lxcgroup : "lxc", name, tail);
+		if (ret < 0 || ret >= MAXPATHLEN)
+			goto fail;
 
-	/* enable the clone_children flag of the cgroup */
-	if (cgroup_enable_clone_children(clonechild)) {
-		SYSERROR("failed to enable 'clone_children flag");
-		return -1;
-	}
+		if (access(path, F_OK) == 0) goto next;
 
-	/* if cgparent does not exist, create it */
-	if (access(cgparent, F_OK)) {
-		ret = mkdir(cgparent, 0755);
-		if (ret == -1 && errno != EEXIST) {
-			SYSERROR("failed to create '%s' directory", cgparent);
-			return -1;
+		if (mkdir(path, 0755)) {
+			ERROR("Error creating cgroups");
+			goto fail;
 		}
-	}
-
-	/*
-	 * There is a previous cgroup.  Try to delete it.  If that fails
-	 * (i.e. it is not empty) try to move it out of the way.
-	 */
-	if (!access(cgname, F_OK) && rmdir(cgname)) {
-		if (try_to_move_cgname(cgparent, cgname)) {
-			SYSERROR("failed to remove previous cgroup '%s'", cgname);
-			return -1;
+		if (hasmntopt(mntent, "cpuset")) {
+			setup_cpuset(path);
 		}
 	}
 
-	/* Let's create the cgroup */
-	if (mkdir(cgname, 0755)) {
-		SYSERROR("failed to create '%s' directory", cgname);
-		return -1;
-	}
+	endmntent(file);
 
-	INFO("created cgroup '%s'", cgname);
+	// print out the cgpath part
+	ret = snprintf(path, MAXPATHLEN, "%s/%s%s",
+		       lxcgroup ? lxcgroup : "lxc", name, tail);
+	if (ret < 0 || ret >= MAXPATHLEN) // can't happen
+		goto fail;
 
-	return 0;
+	retpath = strdup(path);
+
+	return retpath;
+
+next:
+	endmntent(file);
+	i++;
+	goto again;
+
+fail:
+	endmntent(file);
+	return NULL;
 }
 
-/*
- * for each mounted cgroup, create a cgroup for the container and attach a pid
- */
-int lxc_cgroup_create(const char *name, pid_t pid)
+int lxc_cgroup_enter(const char *cgpath, pid_t pid)
 {
+	char path[MAXPATHLEN], initpath[MAXPATHLEN], *init;
+	FILE *file = NULL, *fout;
 	struct mntent *mntent;
-	FILE *file = NULL;
-	int err = -1;
-	int found = 0;
+	int ret, retv = -1;
 
 	file = setmntent(MTAB, "r");
 	if (!file) {
@@ -592,31 +607,30 @@ int lxc_cgroup_create(const char *name, pid_t pid)
 	}
 
 	while ((mntent = getmntent(file))) {
-		DEBUG("checking '%s' (%s)", mntent->mnt_dir, mntent->mnt_type);
-
 		if (strcmp(mntent->mnt_type, "cgroup"))
 			continue;
 		if (!mount_has_subsystem(mntent))
 			continue;
-
-		INFO("[%d] found cgroup mounted at '%s',opts='%s'",
-		     ++found, mntent->mnt_dir, mntent->mnt_opts);
-
-		err = lxc_one_cgroup_create(name, mntent, pid);
-		if (err)
+		init = get_init_cgroup(NULL, mntent, initpath, 1);
+		ret = snprintf(path, MAXPATHLEN, "%s%s/%s/tasks",
+			       mntent->mnt_dir, init, cgpath);
+		if (ret < 0 || ret >= MAXPATHLEN) {
+			ERROR("entering cgroup");
 			goto out;
-
-		err = lxc_one_cgroup_attach(name, mntent, pid);
-		if (err)
+		}
+		fout = fopen(path, "w");
+		if (!fout) {
+			ERROR("entering cgroup");
 			goto out;
-	};
-
-	if (!found)
-		ERROR("No cgroup mounted on the system");
+		}
+		fprintf(fout, "%d\n", (int)pid);
+		fclose(fout);
+	}
+	retv = 0;
 
 out:
 	endmntent(file);
-	return err;
+	return retv;
 }
 
 int recursive_rmdir(char *dirname)
@@ -664,16 +678,14 @@ int recursive_rmdir(char *dirname)
 
 }
 
-int lxc_one_cgroup_destroy(struct mntent *mntent, const char *name)
+static int lxc_one_cgroup_destroy(struct mntent *mntent, const char *cgpath)
 {
 	char cgname[MAXPATHLEN], initcgroup[MAXPATHLEN];
 	char *cgmnt = mntent->mnt_dir;
-	int flags = get_cgroup_flags(mntent);
 	int rc;
 
-	rc = snprintf(cgname, MAXPATHLEN, "%s%s%s/%s", cgmnt,
-		get_init_cgroup(NULL, mntent, initcgroup),
-		(flags & CGROUP_NS_CGROUP) ? "" : "/lxc", name);
+	rc = snprintf(cgname, MAXPATHLEN, "%s%s/%s", cgmnt,
+		get_init_cgroup(NULL, mntent, initcgroup, 1), cgpath);
 	if (rc < 0 || rc >= MAXPATHLEN) {
 		ERROR("name too long");
 		return -1;
@@ -692,11 +704,11 @@ int lxc_one_cgroup_destroy(struct mntent *mntent, const char *name)
 /*
  * for each mounted cgroup, destroy the cgroup for the container
  */
-int lxc_cgroup_destroy(const char *name)
+int lxc_cgroup_destroy(const char *cgpath)
 {
 	struct mntent *mntent;
 	FILE *file = NULL;
-	int err = -1;
+	int err, retv  = 0;
 
 	file = setmntent(MTAB, "r");
 	if (!file) {
@@ -710,168 +722,17 @@ int lxc_cgroup_destroy(const char *name)
 		if (!mount_has_subsystem(mntent))
 			continue;
 
-		err = lxc_one_cgroup_destroy(mntent, name);
-		if (err)
-			break;
+		err = lxc_one_cgroup_destroy(mntent, cgpath);
+		if (err)  // keep trying to clean up the others
+			retv = -1;
 	}
 
 	endmntent(file);
-	return err;
+	return retv;
 }
-/*
- * lxc_cgroup_path_get: put into *path the pathname for
- * %subsystem and cgroup %name.  If %subsystem is NULL, then
- * the first mounted cgroup will be used (for nr_tasks)
- */
-int lxc_cgroup_path_get(char **path, const char *subsystem, const char *name)
-{
-	static char        buf[MAXPATHLEN];
-	static char        retbuf[MAXPATHLEN];
-	int rc;
 
-	/* lxc_cgroup_set passes a state object for the subsystem,
-	 * so trim it to just the subsystem part */
-	if (subsystem) {
-		rc = snprintf(retbuf, MAXPATHLEN, "%s", subsystem);
-		if (rc < 0 || rc >= MAXPATHLEN) {
-			ERROR("subsystem name too long");
-			return -1;
-		}
-		char *s = index(retbuf, '.');
-		if (s)
-			*s = '\0';
-		DEBUG("%s: called for subsys %s name %s\n", __func__, retbuf, name);
-	}
-	if (get_cgroup_mount(subsystem ? retbuf : NULL, buf)) {
-		ERROR("cgroup is not mounted");
-		return -1;
-	}
-
-	rc = snprintf(retbuf, MAXPATHLEN, "%s/%s", buf, name);
-	if (rc < 0 || rc >= MAXPATHLEN) {
-		ERROR("name too long");
-		return -1;
-	}
-
-	DEBUG("%s: returning %s for subsystem %s", __func__, retbuf, subsystem);
-
-	*path = retbuf;
-	return 0;
-}
-
-int lxc_cgroup_set(const char *name, const char *filename, const char *value)
-{
-	int fd, ret;
-	char *dirpath;
-	char path[MAXPATHLEN];
-	int rc;
-
-	ret = lxc_cgroup_path_get(&dirpath, filename, name);
-	if (ret)
-		return -1;
-
-	rc = snprintf(path, MAXPATHLEN, "%s/%s", dirpath, filename);
-	if (rc < 0 || rc >= MAXPATHLEN) {
-		ERROR("pathname too long");
-		return -1;
-	}
-
-	fd = open(path, O_WRONLY);
-	if (fd < 0) {
-		ERROR("open %s : %s", path, strerror(errno));
-		return -1;
-	}
-
-	ret = write(fd, value, strlen(value));
-	if (ret < 0) {
-		ERROR("write %s : %s", path, strerror(errno));
-		goto out;
-	}
-
-	ret = 0;
-out:
-	close(fd);
-	return ret;
-}
-
-/*
- * If you pass in NULL value or 0 len, then you are asking for the size
- * of the file.  Note that we can't get the file size quickly through stat
- * or lseek.  Therefore if you pass in len > 0 but less than the file size,
- * your only indication will be that the return value will be equal to the
- * passed-in ret.  We will not return the actual full file size.
- */
-int lxc_cgroup_get(const char *name, const char *filename,
-		   char *value, size_t len)
+int lxc_cgroup_attach(const char *name, const char *lxcpath)
 {
-	int fd, ret = -1;
-	char *dirpath;
-	char path[MAXPATHLEN];
-	int rc;
-
-	ret = lxc_cgroup_path_get(&dirpath, filename, name);
-	if (ret)
-		return -1;
-
-	rc = snprintf(path, MAXPATHLEN, "%s/%s", dirpath, filename);
-	if (rc < 0 || rc >= MAXPATHLEN) {
-		ERROR("pathname too long");
-		return -1;
-	}
-
-	fd = open(path, O_RDONLY);
-	if (fd < 0) {
-		ERROR("open %s : %s", path, strerror(errno));
-		return -1;
-	}
-
-    if (!len || !value) {
-        char buf[100];
-        int count = 0;
-        while ((ret = read(fd, buf, 100)) > 0)
-            count += ret;
-        if (ret >= 0)
-            ret = count;
-    } else {
-        memset(value, 0, len);
-        ret = read(fd, value, len);
-    }
-
-	if (ret < 0)
-		ERROR("read %s : %s", path, strerror(errno));
-
-	close(fd);
-	return ret;
-}
-
-int lxc_cgroup_nrtasks(const char *name)
-{
-	char *dpath;
-	char path[MAXPATHLEN];
-	int pid, ret, count = 0;
-	FILE *file;
-	int rc;
-
-	ret = lxc_cgroup_path_get(&dpath, NULL, name);
-	if (ret)
-		return -1;
-
-	rc = snprintf(path, MAXPATHLEN, "%s/tasks", dpath);
-	if (rc < 0 || rc >= MAXPATHLEN) {
-		ERROR("pathname too long");
-		return -1;
-	}
-
-	file = fopen(path, "r");
-	if (!file) {
-		SYSERROR("fopen '%s' failed", path);
-		return -1;
-	}
-
-	while (fscanf(file, "%d", &pid) != EOF)
-		count++;
-
-	fclose(file);
-
-	return count;
+	// XXX not yet implemented XXX TODO
+	return -1;
 }
diff --git a/src/lxc/cgroup.h b/src/lxc/cgroup.h
index 8167f39..457f07e 100644
--- a/src/lxc/cgroup.h
+++ b/src/lxc/cgroup.h
@@ -26,13 +26,12 @@
 #define MAXPRIOLEN 24
 
 struct lxc_handler;
-extern int lxc_cgroup_create(const char *name, pid_t pid);
-extern int lxc_cgroup_destroy(const char *name);
-extern int lxc_cgroup_path_get(char **path, const char *subsystem, const char *name);
-extern int lxc_cgroup_nrtasks(const char *name);
-extern int lxc_cgroup_attach(const char *name, pid_t pid);
-extern int lxc_cgroup_prepare_attach(const char *name, void **data);
-extern int lxc_cgroup_finish_attach(void *data, pid_t pid);
-extern int lxc_cgroup_dispose_attach(void *data);
-extern int lxc_ns_is_mounted(void);
+extern int lxc_cgroup_destroy(const char *cgpath);
+extern int lxc_cgroup_path_get(char **path, const char *subsystem, const char *name,
+			      const char *lxcpath);
+extern int lxc_cgroup_nrtasks(const char *cgpath);
+extern char *lxc_cgroup_path_create(const char *lxcgroup, const char *name);
+extern int lxc_cgroup_enter(const char *cgpath, pid_t pid);
+extern int lxc_cgroup_attach(const char *name, const char *lxcpath);
+int cgroup_path_get(char **path, const char *subsystem, const char *cgpath);
 #endif
diff --git a/src/lxc/commands.c b/src/lxc/commands.c
index 83e7df1..de9a7c4 100644
--- a/src/lxc/commands.c
+++ b/src/lxc/commands.c
@@ -84,10 +84,19 @@ static int fill_sock_name(char *path, int len, const char *name,
 static int receive_answer(int sock, struct lxc_answer *answer)
 {
 	int ret;
+	static char answerpath[MAXPATHLEN];
 
 	ret = lxc_af_unix_recv_fd(sock, &answer->fd, answer, sizeof(*answer));
 	if (ret < 0)
 		ERROR("failed to receive answer for the command");
+	if (answer->pathlen == 0)
+		return ret;
+	ret = recv(sock, answerpath, answer->pathlen, 0);
+	if (ret != answer->pathlen) {
+		ERROR("failed to receive answer for the command");
+		ret = 0;
+	} else
+		answer->path = answerpath;
 
 	return ret;
 }
@@ -202,6 +211,7 @@ extern int  lxc_stop_callback(int, struct lxc_request *, struct lxc_handler *);
 extern int  lxc_state_callback(int, struct lxc_request *, struct lxc_handler *);
 extern int  lxc_pid_callback(int, struct lxc_request *, struct lxc_handler *);
 extern int  lxc_clone_flags_callback(int, struct lxc_request *, struct lxc_handler *);
+extern int lxc_cgroup_callback(int, struct lxc_request *, struct lxc_handler *);
 
 static int trigger_command(int fd, struct lxc_request *request,
 			   struct lxc_handler *handler)
@@ -214,6 +224,7 @@ static int trigger_command(int fd, struct lxc_request *request,
 		[LXC_COMMAND_STATE]       = lxc_state_callback,
 		[LXC_COMMAND_PID]         = lxc_pid_callback,
 		[LXC_COMMAND_CLONE_FLAGS] = lxc_clone_flags_callback,
+		[LXC_COMMAND_CGROUP]      = lxc_cgroup_callback,
 	};
 
 	if (request->type < 0 || request->type >= LXC_COMMAND_MAX)
diff --git a/src/lxc/commands.h b/src/lxc/commands.h
index 0b72cf1..8d77ce8 100644
--- a/src/lxc/commands.h
+++ b/src/lxc/commands.h
@@ -29,6 +29,7 @@ enum {
 	LXC_COMMAND_STATE,
 	LXC_COMMAND_PID,
 	LXC_COMMAND_CLONE_FLAGS,
+	LXC_COMMAND_CGROUP,
 	LXC_COMMAND_MAX,
 };
 
@@ -41,6 +42,8 @@ struct lxc_answer {
 	int fd;
 	int ret; /* 0 on success, -errno on failure */
 	pid_t pid;
+	int pathlen;
+	const char *path;
 };
 
 struct lxc_command {
diff --git a/src/lxc/conf.c b/src/lxc/conf.c
index 04ab8b8..1b54e9f 100644
--- a/src/lxc/conf.c
+++ b/src/lxc/conf.c
@@ -1375,7 +1375,7 @@ static int setup_kmsg(const struct lxc_rootfs *rootfs,
 	return 0;
 }
 
-int setup_cgroup(const char *name, struct lxc_list *cgroups)
+int setup_cgroup(const char *cgpath, struct lxc_list *cgroups)
 {
 	struct lxc_list *iterator;
 	struct lxc_cgroup *cg;
@@ -1388,8 +1388,11 @@ int setup_cgroup(const char *name, struct lxc_list *cgroups)
 
 		cg = iterator->elem;
 
-		if (lxc_cgroup_set(name, cg->subsystem, cg->value))
+		if (lxc_cgroup_set_bypath(cgpath, cg->subsystem, cg->value)) {
+			ERROR("Error setting %s to %s for %s\n", cg->subsystem,
+				cg->value, cgpath);
 			goto out;
+		}
 
 		DEBUG("cgroup '%s' set to '%s'", cg->subsystem, cg->value);
 	}
diff --git a/src/lxc/conf.h b/src/lxc/conf.h
index 4c48b46..f20fb2f 100644
--- a/src/lxc/conf.h
+++ b/src/lxc/conf.h
@@ -282,7 +282,7 @@ struct lxc_conf {
 
 int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf);
 
-extern int setup_cgroup(const char *name, struct lxc_list *cgroups);
+extern int setup_cgroup(const char *cgpath, struct lxc_list *cgroups);
 extern int detect_shared_rootfs(void);
 
 /*
diff --git a/src/lxc/freezer.c b/src/lxc/freezer.c
index e8b1311..261c2c4 100644
--- a/src/lxc/freezer.c
+++ b/src/lxc/freezer.c
@@ -40,16 +40,11 @@
 
 lxc_log_define(lxc_freezer, lxc);
 
-static int freeze_unfreeze(const char *name, int freeze, const char *lxcpath)
+static int do_unfreeze(const char *nsgroup, int freeze, const char *name, const char *lxcpath)
 {
-	char *nsgroup;
 	char freezer[MAXPATHLEN], *f;
 	char tmpf[32];
 	int fd, ret;
-	
-	ret = lxc_cgroup_path_get(&nsgroup, "freezer", name);
-	if (ret)
-		return -1;
 
 	ret = snprintf(freezer, MAXPATHLEN, "%s/freezer.state", nsgroup);
 	if (ret >= MAXPATHLEN) {
@@ -59,7 +54,7 @@ static int freeze_unfreeze(const char *name, int freeze, const char *lxcpath)
 
 	fd = open(freezer, O_RDWR);
 	if (fd < 0) {
-		SYSERROR("failed to open freezer for '%s'", name);
+		SYSERROR("failed to open freezer at '%s'", nsgroup);
 		return -1;
 	}
 
@@ -98,7 +93,8 @@ static int freeze_unfreeze(const char *name, int freeze, const char *lxcpath)
 		ret = strncmp(f, tmpf, strlen(f));
 		if (!ret)
 		{
-			lxc_monitor_send_state(name, freeze ? FROZEN : THAWED, lxcpath);
+			if (name)
+				lxc_monitor_send_state(name, freeze ? FROZEN : THAWED, lxcpath);
 			break;		/* Success */
 		}
 
@@ -122,6 +118,18 @@ out:
 	return ret;
 }
 
+static int freeze_unfreeze(const char *name, int freeze, const char *lxcpath)
+{
+	char *nsgroup;
+	int ret;
+	
+	ret = lxc_cgroup_path_get(&nsgroup, "freezer", name, lxcpath);
+	if (ret)
+		return -1;
+
+	return do_unfreeze(nsgroup, freeze, name, lxcpath);
+}
+
 int lxc_freeze(const char *name, const char *lxcpath)
 {
 	lxc_monitor_send_state(name, FREEZING, lxcpath);
@@ -133,3 +141,14 @@ int lxc_unfreeze(const char *name, const char *lxcpath)
 	return freeze_unfreeze(name, 0, lxcpath);
 }
 
+int lxc_unfreeze_bypath(const char *cgpath)
+{
+	char *nsgroup;
+	int ret;
+	
+	ret = cgroup_path_get(&nsgroup, "freezer", cgpath);
+	if (ret)
+		return -1;
+
+	return do_unfreeze(nsgroup, 0, NULL, NULL);
+}
diff --git a/src/lxc/lxc.h b/src/lxc/lxc.h
index 0fa9480..784b3ca 100644
--- a/src/lxc/lxc.h
+++ b/src/lxc/lxc.h
@@ -118,6 +118,14 @@ extern int lxc_freeze(const char *name, const char *lxcpath);
 extern int lxc_unfreeze(const char *name, const char *lxcpath);
 
 /*
+ * Unfreeze all previously frozen tasks.
+ * This is the function to use from inside the monitor
+ * @name : the name of the container
+ * Return 0 on sucess, < 0 otherwise
+ */
+extern int lxc_unfreeze_bypath(const char *cgpath);
+
+/*
  * Retrieve the container state
  * @name : the name of the container
  * Returns the state of the container on success, < 0 otherwise
@@ -127,24 +135,36 @@ extern lxc_state_t lxc_state(const char *name, const char *lxcpath);
 /*
  * Set a specified value for a specified subsystem. The specified
  * subsystem must be fully specified, eg. "cpu.shares"
+ * @cgpath    : the cgroup path of the container
+ * @filename  : the cgroup attribute filename
+ * @value     : the value to be set
+ * Returns 0 on success, < 0 otherwise
+ */
+extern int lxc_cgroup_set_bypath(const char *cgpath, const char *filename, const char *value);
+
+/*
+ * Set a specified value for a specified subsystem. The specified
+ * subsystem must be fully specified, eg. "cpu.shares"
  * @name      : the name of the container
- * @filename : the cgroup attribute filename
+ * @filename  : the cgroup attribute filename
  * @value     : the value to be set
+ * @lxcpath   : lxc config path for container
  * Returns 0 on success, < 0 otherwise
  */
-extern int lxc_cgroup_set(const char *name, const char *filename, const char *value);
+extern int lxc_cgroup_set(const char *name, const char *filename, const char *value, const char *lxcpath);
 
 /*
  * Get a specified value for a specified subsystem. The specified
  * subsystem must be fully specified, eg. "cpu.shares"
  * @name      : the name of the container
- * @filename : the cgroup attribute filename
+ * @filename  : the cgroup attribute filename
  * @value     : the value to be set
  * @len       : the len of the value variable
+ * @lxcpath   : lxc config path for container
  * Returns the number of bytes read, < 0 on error
  */
 extern int lxc_cgroup_get(const char *name, const char *filename,
-			  char *value, size_t len);
+			  char *value, size_t len, const char *lxcpath);
 
 /*
  * Retrieve the error string associated with the error returned by
diff --git a/src/lxc/lxc_attach.c b/src/lxc/lxc_attach.c
index 370bfbc..16680b0 100644
--- a/src/lxc/lxc_attach.c
+++ b/src/lxc/lxc_attach.c
@@ -127,9 +127,9 @@ int main(int argc, char *argv[])
 	struct passwd *passwd;
 	struct lxc_proc_context_info *init_ctx;
 	struct lxc_handler *handler;
-	void *cgroup_data = NULL;
 	uid_t uid;
 	char *curdir;
+	int mypipe[2];
 
 	ret = lxc_caps_init();
 	if (ret)
@@ -156,18 +156,6 @@ int main(int argc, char *argv[])
 		return -1;
 	}
 
-	if (!elevated_privileges) {
-	        /* we have to do this now since /sys/fs/cgroup may not
-	         * be available inside the container or we may not have
-	         * the required permissions anymore
-	         */
-		ret = lxc_cgroup_prepare_attach(my_args.name, &cgroup_data);
-		if (ret < 0) {
-			ERROR("failed to prepare attaching to cgroup");
-			return -1;
-		}
-	}
-
 	curdir = getcwd(NULL, 0);
 
 	/* determine which namespaces the container was created with
@@ -183,6 +171,51 @@ int main(int argc, char *argv[])
 		}
 	}
 
+	if (pipe(mypipe)) {
+		SYSERROR("failed creating communications pipe");
+		return -1;
+	}
+
+	pid = fork();
+	if (pid < 0) {
+		SYSERROR("failed to fork\n");
+		return -1;
+	}
+	if (pid) {
+		int status;
+		int gchild;
+
+		close(mypipe[1]);
+		if (read(mypipe[0], &gchild, sizeof(gchild)) <= 0) {
+			ERROR("failed to get pid from grand-child");
+			return -1;
+		}
+
+		if (!elevated_privileges) {
+			ret = lxc_cgroup_attach(my_args.name, my_args.lxcpath);
+			if (ret < 0) {
+				ERROR("failed to attach process to cgroup");
+				return -1;
+			}
+		}
+
+		close(mypipe[0]);
+	again1:
+		if (waitpid(pid, &status, 0) < 0) {
+			if (errno == EINTR)
+				goto again1;
+			SYSERROR("failed to wait '%d'", pid);
+			return -1;
+		}
+
+		if (WIFEXITED(status))
+			return WEXITSTATUS(status);
+
+		return -1;
+
+		return 0;
+	}
+
 	/* we need to attach before we fork since certain namespaces
 	 * (such as pid namespaces) only really affect children of the
 	 * current process and not the process itself
@@ -224,22 +257,13 @@ int main(int argc, char *argv[])
 		if (lxc_sync_wait_child(handler, LXC_SYNC_CONFIGURE))
 			return -1;
 
-		/* now that we are done with all privileged operations,
-		 * we can add ourselves to the cgroup. Since we smuggled in
-		 * the fds earlier, we still have write permission
-		 */
-		if (!elevated_privileges) {
-			/* since setns() for pid namespaces only really
-			 * affects child processes, the pid we have is
-			 * still valid outside the container, so this is
-			 * fine
-			 */
-			ret = lxc_cgroup_finish_attach(cgroup_data, pid);
-			if (ret < 0) {
-				ERROR("failed to attach process to cgroup");
-				return -1;
-			}
+		// ask parent to set cgroups for child
+		close(mypipe[0]);
+		if (write(mypipe[1], &pid, sizeof(pid)) != sizeof(pid)) {
+			ERROR("Error writing child's pid to parent");
+			return -1;
 		}
+		close(mypipe[1]);
 
 		/* tell the child we are done initializing */
 		if (lxc_sync_wake_child(handler, LXC_SYNC_POST_CONFIGURE))
@@ -263,7 +287,8 @@ int main(int argc, char *argv[])
 
 	if (!pid) {
 		lxc_sync_fini_parent(handler);
-		lxc_cgroup_dispose_attach(cgroup_data);
+		close(mypipe[0]);
+		close(mypipe[1]);
 
 		/* A description of the purpose of this functionality is
 		 * provided in the lxc-attach(1) manual page. We have to
@@ -307,6 +332,19 @@ int main(int argc, char *argv[])
 			return -1;
 		}
 
+		if (namespace_flags & CLONE_NEWUSER) {
+			/* XXX FIXME this should get the uid of the container init and setuid to that */
+			/* XXX FIXME or perhaps try to map in the lxc-attach caller's uid? */
+			if (setgid(0)) {
+				SYSERROR("switching to container gid");
+				return -1;
+			}
+			if (setuid(0)) {
+				SYSERROR("switching to container uid");
+				return -1;
+			}
+		}
+
 		uid = getuid();
 
 		passwd = getpwuid(uid);
diff --git a/src/lxc/lxc_cgroup.c b/src/lxc/lxc_cgroup.c
index 970391b..4a55633 100644
--- a/src/lxc/lxc_cgroup.c
+++ b/src/lxc/lxc_cgroup.c
@@ -78,7 +78,7 @@ int main(int argc, char *argv[])
 		value = my_args.argv[1];
 
 	if (value) {
-		if (lxc_cgroup_set(my_args.name, state_object, value)) {
+		if (lxc_cgroup_set(my_args.name, state_object, value, my_args.lxcpath)) {
 			ERROR("failed to assign '%s' value to '%s' for '%s'",
 				value, state_object, my_args.name);
 			return -1;
@@ -88,7 +88,7 @@ int main(int argc, char *argv[])
 		int ret;
 		char buffer[len];
 
-		ret = lxc_cgroup_get(my_args.name, state_object, buffer, len);
+		ret = lxc_cgroup_get(my_args.name, state_object, buffer, len, my_args.lxcpath);
 		if (ret < 0) {
 			ERROR("failed to retrieve value of '%s' for '%s'",
 			      state_object, my_args.name);
diff --git a/src/lxc/lxc_unshare.c b/src/lxc/lxc_unshare.c
index d92a96a..c5d3332 100644
--- a/src/lxc/lxc_unshare.c
+++ b/src/lxc/lxc_unshare.c
@@ -112,7 +112,6 @@ int main(int argc, char *argv[])
 {
 	int opt, status;
 	int ret;
-	char *pid_name;
 	char *namespaces = NULL;
 	char **args;
 	int flags = 0;
@@ -170,14 +169,5 @@ int main(int argc, char *argv[])
 		return -1;
 	}
 
-	if (lxc_ns_is_mounted()) {
-		if (asprintf(&pid_name, "%d", pid) == -1) {
-			ERROR("pid_name: failed to allocate memory");
-			return -1;
-		}
-		lxc_cgroup_destroy(pid_name);
-		free(pid_name);
-	}
-
 	return  lxc_error_set_and_log(pid, status);
 }
diff --git a/src/lxc/lxccontainer.c b/src/lxc/lxccontainer.c
index f24c39f..78f780d 100644
--- a/src/lxc/lxccontainer.c
+++ b/src/lxc/lxccontainer.c
@@ -959,7 +959,7 @@ static bool lxcapi_set_cgroup_item(struct lxc_container *c, const char *subsys,
 	if (is_stopped_nolock(c))
 		goto err;
 
-	ret = lxc_cgroup_set(c->name, subsys, value);
+	ret = lxc_cgroup_set(c->name, subsys, value, c->config_path);
 	if (!ret)
 		b = true;
 err:
@@ -980,7 +980,7 @@ static int lxcapi_get_cgroup_item(struct lxc_container *c, const char *subsys, c
 	if (is_stopped_nolock(c))
 		goto out;
 
-	ret = lxc_cgroup_get(c->name, subsys, retv, inlen);
+	ret = lxc_cgroup_get(c->name, subsys, retv, inlen, c->config_path);
 
 out:
 	lxcunlock(c->privlock);
diff --git a/src/lxc/start.c b/src/lxc/start.c
index de431be..f0e82a3 100644
--- a/src/lxc/start.c
+++ b/src/lxc/start.c
@@ -268,6 +268,7 @@ int lxc_pid_callback(int fd, struct lxc_request *request,
 	struct lxc_answer answer;
 	int ret;
 
+	memset(&answer, 0, sizeof(answer));
 	answer.pid = handler->pid;
 	answer.ret = 0;
 
@@ -285,12 +286,49 @@ int lxc_pid_callback(int fd, struct lxc_request *request,
 	return 0;
 }
 
+int lxc_cgroup_callback(int fd, struct lxc_request *request,
+		     struct lxc_handler *handler)
+{
+	struct lxc_answer answer;
+	int ret;
+
+	memset(&answer, 0, sizeof(answer));
+	answer.pathlen = strlen(handler->cgroup) + 1;
+	answer.path = handler->cgroup;
+	answer.ret = 0;
+
+	ret = send(fd, &answer, sizeof(answer), 0);
+	if (ret < 0) {
+		WARN("failed to send answer to the peer");
+		return -1;
+	}
+
+	if (ret != sizeof(answer)) {
+		ERROR("partial answer sent");
+		return -1;
+	}
+
+	ret = send(fd, answer.path, answer.pathlen, 0);
+	if (ret < 0) {
+		WARN("failed to send answer to the peer");
+		return -1;
+	}
+
+	if (ret != answer.pathlen) {
+		ERROR("partial answer sent");
+		return -1;
+	}
+
+	return 0;
+}
+
 int lxc_clone_flags_callback(int fd, struct lxc_request *request,
 			     struct lxc_handler *handler)
 {
 	struct lxc_answer answer;
 	int ret;
 
+	memset(&answer, 0, sizeof(answer));
 	answer.pid = 0;
 	answer.ret = handler->clone_flags;
 
@@ -467,7 +505,7 @@ out_free:
 	return NULL;
 }
 
-void lxc_fini(const char *name, struct lxc_handler *handler)
+static void lxc_fini(const char *name, struct lxc_handler *handler)
 {
 	/* The STOPPING state is there for future cleanup code
 	 * which can take awhile
@@ -487,6 +525,11 @@ void lxc_fini(const char *name, struct lxc_handler *handler)
 	close(handler->conf->maincmd_fd);
 	handler->conf->maincmd_fd = -1;
 	free(handler->name);
+	if (handler->cgroup) {
+		lxc_cgroup_destroy(handler->cgroup);
+		free(handler->cgroup);
+		handler->cgroup = NULL;
+	}
 	free(handler);
 }
 
@@ -756,7 +799,11 @@ int lxc_spawn(struct lxc_handler *handler)
 	if (lxc_sync_wait_child(handler, LXC_SYNC_CONFIGURE))
 		failed_before_rename = 1;
 
-	if (lxc_cgroup_create(name, handler->pid))
+	/* TODO - pass lxc.cgroup.dir (or user's pam cgroup) in for first argument */
+	if ((handler->cgroup = lxc_cgroup_path_create(NULL, name)) == NULL)
+		goto out_delete_net;
+	
+	if (lxc_cgroup_enter(handler->cgroup, handler->pid) < 0)
 		goto out_delete_net;
 
 	if (failed_before_rename)
@@ -786,7 +833,7 @@ int lxc_spawn(struct lxc_handler *handler)
 	if (lxc_sync_barrier_child(handler, LXC_SYNC_POST_CONFIGURE))
 		goto out_delete_net;
 
-	if (setup_cgroup(name, &handler->conf->cgroup)) {
+	if (setup_cgroup(handler->cgroup, &handler->conf->cgroup)) {
 		ERROR("failed to setup the cgroups for '%s'", name);
 		goto out_delete_net;
 	}
@@ -904,7 +951,6 @@ out_fini:
 	lxc_delete_network(handler);
 
 out_fini_nonet:
-	lxc_cgroup_destroy(name);
 	lxc_fini(name, handler);
 	return err;
 
diff --git a/src/lxc/start.h b/src/lxc/start.h
index a0afa87..5b3488f 100644
--- a/src/lxc/start.h
+++ b/src/lxc/start.h
@@ -51,6 +51,7 @@ struct lxc_handler {
 #endif
 	int pinfd;
 	const char *lxcpath;
+	char *cgroup;
 };
 
 extern struct lxc_handler *lxc_init(const char *name, struct lxc_conf *, const char *);
@@ -58,7 +59,6 @@ extern int lxc_spawn(struct lxc_handler *);
 
 extern int lxc_poll(const char *name, struct lxc_handler *handler);
 extern void lxc_abort(const char *name, struct lxc_handler *handler);
-extern void lxc_fini(const char *name, struct lxc_handler *handler);
 extern int lxc_set_state(const char *, struct lxc_handler *, lxc_state_t);
 extern int lxc_check_inherited(struct lxc_conf *conf, int fd_to_ignore);
 int __lxc_start(const char *, struct lxc_conf *, struct lxc_operations *,
diff --git a/src/lxc/state.c b/src/lxc/state.c
index 9fcb06d..169aaf5 100644
--- a/src/lxc/state.c
+++ b/src/lxc/state.c
@@ -66,7 +66,7 @@ lxc_state_t lxc_str2state(const char *state)
 	return -1;
 }
 
-static int freezer_state(const char *name)
+static int freezer_state(const char *name, const char *lxcpath)
 {
 	char *nsgroup;
 	char freezer[MAXPATHLEN];
@@ -74,7 +74,7 @@ static int freezer_state(const char *name)
 	FILE *file;
 	int err;
 
-	err = lxc_cgroup_path_get(&nsgroup, "freezer", name);
+	err = lxc_cgroup_path_get(&nsgroup, "freezer", name, lxcpath);
 	if (err)
 		return -1;
 
@@ -132,7 +132,7 @@ static lxc_state_t __lxc_getstate(const char *name, const char *lxcpath)
 
 lxc_state_t lxc_getstate(const char *name, const char *lxcpath)
 {
-	int state = freezer_state(name);
+	int state = freezer_state(name, lxcpath);
 	if (state != FROZEN && state != FREEZING)
 		state = __lxc_getstate(name, lxcpath);
 	return state;
@@ -148,6 +148,7 @@ extern int lxc_state_callback(int fd, struct lxc_request *request,
 	struct lxc_answer answer;
 	int ret;
 
+	memset(&answer, 0, sizeof(answer));
 	answer.ret = handler->state;
 
 	ret = send(fd, &answer, sizeof(answer), 0);
diff --git a/src/lxc/stop.c b/src/lxc/stop.c
index c796283..851a4bf 100644
--- a/src/lxc/stop.c
+++ b/src/lxc/stop.c
@@ -83,9 +83,10 @@ extern int lxc_stop_callback(int fd, struct lxc_request *request,
 	struct lxc_answer answer;
 	int ret;
 
+	memset(&answer, 0, sizeof(answer));
 	answer.ret = kill(handler->pid, SIGKILL);
 	if (!answer.ret) {
-		ret = lxc_unfreeze(handler->name, handler->lxcpath);
+		ret = lxc_unfreeze_bypath(handler->cgroup);
 		if (!ret)
 			return 0;
 
diff --git a/src/tests/Makefile.am b/src/tests/Makefile.am
index a883d36..4e5228a 100644
--- a/src/tests/Makefile.am
+++ b/src/tests/Makefile.am
@@ -16,6 +16,7 @@ lxc_test_shutdowntest_SOURCES = shutdowntest.c
 lxc_test_get_item_SOURCES = get_item.c
 lxc_test_getkeys_SOURCES = getkeys.c
 lxc_test_lxcpath_SOURCES = lxcpath.c
+lxc_test_cgpath_SOURCES = cgpath.c
 
 AM_CFLAGS=-I$(top_srcdir)/src \
 	-DLXCROOTFSMOUNT=\"$(LXCROOTFSMOUNT)\" \
@@ -25,6 +26,7 @@ AM_CFLAGS=-I$(top_srcdir)/src \
 
 bin_PROGRAMS = lxc-test-containertests lxc-test-locktests lxc-test-startone \
 	lxc-test-destroytest lxc-test-saveconfig lxc-test-createtest \
-	lxc-test-shutdowntest lxc-test-get_item lxc-test-getkeys lxc-test-lxcpath
+	lxc-test-shutdowntest lxc-test-get_item lxc-test-getkeys lxc-test-lxcpath \
+	lxc-test-cgpath
 
 endif
diff --git a/src/tests/cgpath.c b/src/tests/cgpath.c
new file mode 100644
index 0000000..43a1513
--- /dev/null
+++ b/src/tests/cgpath.c
@@ -0,0 +1,142 @@
+/* liblxcapi
+ *
+ * Copyright © 2012 Serge Hallyn <serge.hallyn at ubuntu.com>.
+ * Copyright © 2012 Canonical Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#include "../lxc/lxccontainer.h"
+
+#include <unistd.h>
+#include <signal.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdlib.h>
+#include <errno.h>
+#include "../lxc/cgroup.h"
+#include "../lxc/lxc.h"
+
+#define MYNAME "lxctest1"
+#define MYNAME2 "lxctest2"
+
+#define TSTERR(x) do { \
+	fprintf(stderr, "%d: %s\n", __LINE__, x); \
+} while (0)
+
+int main()
+{
+	struct lxc_container *c = NULL, *c2 = NULL;
+	char *path;
+	int len;
+	int ret, retv = -1;
+
+	/* won't require privilege necessarily once users are classified by
+	 * pam_cgroup */
+	if (geteuid() != 0) {
+		TSTERR("requires privilege");
+		exit(0);
+	}
+
+	path = lxc_cgroup_path_create(NULL, MYNAME);
+	len = strlen(path);
+	if (!path || !len) {
+		TSTERR("zero result from lxc_cgroup_path_create");
+		exit(1);
+	}
+	if (strstr(path, "lxc/" MYNAME)) {
+		TSTERR("lxc_cgroup_path_create NULL lxctest1");
+		exit(1);
+	}
+	free(path);
+
+	path = lxc_cgroup_path_create("ab", MYNAME);
+	len = strlen(path);
+	if (!path || !len) {
+		TSTERR("zero result from lxc_cgroup_path_create");
+		exit(1);
+	}
+	if (strstr(path, "ab/" MYNAME)) {
+		TSTERR("lxc_cgroup_path_create ab lxctest1");
+		exit(1);
+	}
+	free(path);
+
+	if ((c = lxc_container_new(MYNAME, NULL)) == NULL) {
+		TSTERR("instantiating first container");
+		exit(1);
+	}
+	c->set_config_item(c, "lxc.network.type", "empty");
+	if (!c->createl(c, "ubuntu", NULL)) {
+		TSTERR("creating first container");
+		exit(1);
+	}
+	c->load_config(c, NULL);
+	c->want_daemonize(c);
+	if (!c->startl(c, 0, NULL)) {
+		TSTERR("starting first container");
+		exit(1);
+	}
+
+	char *nsgroup;
+#define ALTBASE "/var/lib/lxctest2"
+	ret = mkdir(ALTBASE, 755);
+
+	ret = lxc_cgroup_path_get(&nsgroup, "freezer", MYNAME, c->get_config_path(c));
+	if (ret < 0 || strstr(nsgroup, "lxc/" MYNAME)) {
+		TSTERR("getting first cgroup path from lxc_command");
+		exit(1);
+	}
+
+	/* start second container */
+	if ((c2 = lxc_container_new(MYNAME2, ALTBASE)) == NULL) {
+		TSTERR("instantiating first container");
+		exit(1);
+	}
+	c2->set_config_item(c2, "lxc.network.type", "empty");
+	if (!c2->createl(c2, "ubuntu", NULL)) {
+		TSTERR("creating first container");
+		exit(1);
+	}
+
+	c2->load_config(c2, NULL);
+	c2->want_daemonize(c2);
+	if (!c2->startl(c2, 0, NULL)) {
+		TSTERR("starting first container");
+		goto out;
+	}
+
+	ret = lxc_cgroup_path_get(&nsgroup, "freezer", MYNAME2, c2->get_config_path(c2));
+	if (ret < 0 || strstr(nsgroup, "lxc/" MYNAME2)) {
+		TSTERR("getting second cgroup path from lxc_command");
+		exit(1);
+	}
+
+	if (lxc_cgroup_nrtasks(nsgroup) < 1) {
+		TSTERR("getting nrtasks");
+		exit(1);
+	}
+
+	retv = 0;
+out:
+	if (c2) {
+		c2->stop(c2);
+		c2->destroy(c2);
+	}
+	if (c) {
+		c->stop(c);
+		c->destroy(c);
+	}
+	return retv;
+}
diff --git a/src/tests/lxcpath.c b/src/tests/lxcpath.c
index 92525ed..a849a38 100644
--- a/src/tests/lxcpath.c
+++ b/src/tests/lxcpath.c
@@ -29,7 +29,7 @@
 #define MYNAME "lxctest1"
 
 #define TSTERR(x) do { \
-	fprintf(stderr, "%d: %s", __LINE__, x); \
+	fprintf(stderr, "%d: %s\n", __LINE__, x); \
 } while (0)
 
 int main()
-- 
1.8.1.2





More information about the lxc-devel mailing list