[lxc-devel] [PATCH 1/2] move cgroup.c to cgfs.c in preparation for backend change

Dwight Engen dwight.engen at oracle.com
Wed Feb 5 21:59:16 UTC 2014


This is just a move without any changes so history will be preserved.
Makefile.am was modified so that lxc will still build and run.

Signed-off-by: Dwight Engen <dwight.engen at oracle.com>
---
 src/lxc/Makefile.am |    2 +-
 src/lxc/cgfs.c      | 2461 +++++++++++++++++++++++++++++++++++++++++++++++++++
 src/lxc/cgroup.c    | 2461 ---------------------------------------------------
 3 files changed, 2462 insertions(+), 2462 deletions(-)
 create mode 100644 src/lxc/cgfs.c
 delete mode 100644 src/lxc/cgroup.c

diff --git a/src/lxc/Makefile.am b/src/lxc/Makefile.am
index 83c0585..b4aa924 100644
--- a/src/lxc/Makefile.am
+++ b/src/lxc/Makefile.am
@@ -62,7 +62,7 @@ liblxc_so_SOURCES = \
 	freezer.c \
 	error.h error.c \
 	parse.c parse.h \
-	cgroup.c cgroup.h \
+	cgfs.c cgroup.h \
 	lxc.h \
 	utils.c utils.h \
 	sync.c sync.h \
diff --git a/src/lxc/cgfs.c b/src/lxc/cgfs.c
new file mode 100644
index 0000000..c23b784
--- /dev/null
+++ b/src/lxc/cgfs.c
@@ -0,0 +1,2461 @@
+/*
+ * lxc: linux Container library
+ *
+ * (C) Copyright IBM Corp. 2007, 2008
+ *
+ * Authors:
+ * Daniel Lezcano <daniel.lezcano at free.fr>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include "config.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <unistd.h>
+#include <string.h>
+#include <dirent.h>
+#include <fcntl.h>
+#include <ctype.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/param.h>
+#include <sys/inotify.h>
+#include <sys/mount.h>
+#include <netinet/in.h>
+#include <net/if.h>
+
+#include "error.h"
+#include "commands.h"
+#include "list.h"
+#include "conf.h"
+#include "utils.h"
+#include "bdev.h"
+#include "log.h"
+#include "cgroup.h"
+#include "start.h"
+#include "state.h"
+
+#if IS_BIONIC
+#include <../include/lxcmntent.h>
+#else
+#include <mntent.h>
+#endif
+
+lxc_log_define(lxc_cgroup, lxc);
+
+static struct cgroup_process_info *lxc_cgroup_process_info_getx(const char *proc_pid_cgroup_str, struct cgroup_meta_data *meta);
+static char **subsystems_from_mount_options(const char *mount_options, char **kernel_list);
+static void lxc_cgroup_mount_point_free(struct cgroup_mount_point *mp);
+static void lxc_cgroup_hierarchy_free(struct cgroup_hierarchy *h);
+static bool is_valid_cgroup(const char *name);
+static int create_cgroup(struct cgroup_mount_point *mp, const char *path);
+static int remove_cgroup(struct cgroup_mount_point *mp, const char *path, bool recurse);
+static char *cgroup_to_absolute_path(struct cgroup_mount_point *mp, const char *path, const char *suffix);
+static struct cgroup_process_info *find_info_for_subsystem(struct cgroup_process_info *info, const char *subsystem);
+static int do_cgroup_get(const char *cgroup_path, const char *sub_filename, char *value, size_t len);
+static int do_cgroup_set(const char *cgroup_path, const char *sub_filename, const char *value);
+static bool cgroup_devices_has_allow_or_deny(struct lxc_handler *h, char *v, bool for_allow);
+static int do_setup_cgroup_limits(struct lxc_handler *h, struct lxc_list *cgroup_settings, bool do_devices);
+static int cgroup_recursive_task_count(const char *cgroup_path);
+static int count_lines(const char *fn);
+static int handle_cgroup_settings(struct cgroup_mount_point *mp, char *cgroup_path);
+static bool init_cpuset_if_needed(struct cgroup_mount_point *mp, const char *path);
+
+static struct cgroup_ops cgfs_ops;
+struct cgroup_ops *active_cg_ops = &cgfs_ops;
+static void init_cg_ops(void);
+
+#ifdef HAVE_CGMANAGER
+/* this needs to be mutexed for api use */
+extern bool cgmanager_initialized;
+extern bool use_cgmanager;
+extern bool lxc_init_cgmanager(void);
+#else
+static bool cgmanager_initialized = false;
+static bool use_cgmanager = false;
+static bool lxc_init_cgmanager(void) { return false; }
+#endif
+
+static int cgroup_rmdir(char *dirname)
+{
+	struct dirent dirent, *direntp;
+	int saved_errno = 0;
+	DIR *dir;
+	int ret, failed=0;
+	char pathname[MAXPATHLEN];
+
+	dir = opendir(dirname);
+	if (!dir) {
+		ERROR("%s: failed to open %s", __func__, dirname);
+		return -1;
+	}
+
+	while (!readdir_r(dir, &dirent, &direntp)) {
+		struct stat mystat;
+		int rc;
+
+		if (!direntp)
+			break;
+
+		if (!strcmp(direntp->d_name, ".") ||
+		    !strcmp(direntp->d_name, ".."))
+			continue;
+
+		rc = snprintf(pathname, MAXPATHLEN, "%s/%s", dirname, direntp->d_name);
+		if (rc < 0 || rc >= MAXPATHLEN) {
+			ERROR("pathname too long");
+			failed=1;
+			if (!saved_errno)
+				saved_errno = -ENOMEM;
+			continue;
+		}
+		ret = lstat(pathname, &mystat);
+		if (ret) {
+			SYSERROR("%s: failed to stat %s", __func__, pathname);
+			failed=1;
+			if (!saved_errno)
+				saved_errno = errno;
+			continue;
+		}
+		if (S_ISDIR(mystat.st_mode)) {
+			if (cgroup_rmdir(pathname) < 0) {
+				if (!saved_errno)
+					saved_errno = errno;
+				failed=1;
+			}
+		}
+	}
+
+	if (rmdir(dirname) < 0) {
+		SYSERROR("%s: failed to delete %s", __func__, dirname);
+		if (!saved_errno)
+			saved_errno = errno;
+		failed=1;
+	}
+
+	ret = closedir(dir);
+	if (ret) {
+		SYSERROR("%s: failed to close directory %s", __func__, dirname);
+		if (!saved_errno)
+			saved_errno = errno;
+		failed=1;
+	}
+
+	errno = saved_errno;
+	return failed ? -1 : 0;
+}
+
+struct cgroup_meta_data *lxc_cgroup_load_meta()
+{
+	const char *cgroup_use = NULL;
+	char **cgroup_use_list = NULL;
+	struct cgroup_meta_data *md = NULL;
+	int saved_errno;
+
+	errno = 0;
+	cgroup_use = lxc_global_config_value("lxc.cgroup.use");
+	if (!cgroup_use && errno != 0)
+		return NULL;
+	if (cgroup_use) {
+		cgroup_use_list = lxc_string_split_and_trim(cgroup_use, ',');
+		if (!cgroup_use_list)
+			return NULL;
+	}
+
+	md = lxc_cgroup_load_meta2((const char **)cgroup_use_list);
+	saved_errno = errno;
+	lxc_free_array((void **)cgroup_use_list, free);
+	errno = saved_errno;
+	return md;
+}
+
+/* Step 1: determine all kernel subsystems */
+bool find_cgroup_subsystems(char ***kernel_subsystems)
+{
+	FILE *proc_cgroups;
+	bool bret = false;
+	char *line = NULL;
+	size_t sz = 0;
+	size_t kernel_subsystems_count = 0;
+	size_t kernel_subsystems_capacity = 0;
+	int r;
+
+	proc_cgroups = fopen_cloexec("/proc/cgroups", "r");
+	if (!proc_cgroups)
+		return false;
+
+	while (getline(&line, &sz, proc_cgroups) != -1) {
+		char *tab1;
+		char *tab2;
+		int hierarchy_number;
+
+		if (line[0] == '#')
+			continue;
+		if (!line[0])
+			continue;
+
+		tab1 = strchr(line, '\t');
+		if (!tab1)
+			continue;
+		*tab1++ = '\0';
+		tab2 = strchr(tab1, '\t');
+		if (!tab2)
+			continue;
+		*tab2 = '\0';
+
+		tab2 = NULL;
+		hierarchy_number = strtoul(tab1, &tab2, 10);
+		if (!tab2 || *tab2)
+			continue;
+		(void)hierarchy_number;
+
+		r = lxc_grow_array((void ***)kernel_subsystems, &kernel_subsystems_capacity, kernel_subsystems_count + 1, 12);
+		if (r < 0)
+			goto out;
+		(*kernel_subsystems)[kernel_subsystems_count] = strdup(line);
+		if (!(*kernel_subsystems)[kernel_subsystems_count])
+			goto out;
+		kernel_subsystems_count++;
+	}
+	bret = true;
+
+out:
+	fclose(proc_cgroups);
+	free(line);
+	return bret;
+}
+
+/* Step 2: determine all hierarchies (by reading /proc/self/cgroup),
+ *         since mount points don't specify hierarchy number and
+ *         /proc/cgroups does not contain named hierarchies
+ */
+static bool find_cgroup_hierarchies(struct cgroup_meta_data *meta_data,
+	bool all_kernel_subsystems, bool all_named_subsystems,
+	const char **subsystem_whitelist)
+{
+	FILE *proc_self_cgroup;
+	char *line = NULL;
+	size_t sz = 0;
+	int r;
+	bool bret = false;
+	size_t hierarchy_capacity = 0;
+
+	proc_self_cgroup = fopen_cloexec("/proc/self/cgroup", "r");
+	/* if for some reason (because of setns() and pid namespace for example),
+	 * /proc/self is not valid, we try /proc/1/cgroup... */
+	if (!proc_self_cgroup)
+		proc_self_cgroup = fopen_cloexec("/proc/1/cgroup", "r");
+	if (!proc_self_cgroup)
+		return false;
+
+	while (getline(&line, &sz, proc_self_cgroup) != -1) {
+		/* file format: hierarchy:subsystems:group,
+		 * we only extract hierarchy and subsystems
+		 * here */
+		char *colon1;
+		char *colon2;
+		int hierarchy_number;
+		struct cgroup_hierarchy *h = NULL;
+		char **p;
+
+		if (!line[0])
+			continue;
+
+		colon1 = strchr(line, ':');
+		if (!colon1)
+			continue;
+		*colon1++ = '\0';
+		colon2 = strchr(colon1, ':');
+		if (!colon2)
+			continue;
+		*colon2 = '\0';
+
+		colon2 = NULL;
+		hierarchy_number = strtoul(line, &colon2, 10);
+		if (!colon2 || *colon2)
+			continue;
+
+		if (hierarchy_number > meta_data->maximum_hierarchy) {
+			/* lxc_grow_array will never shrink, so even if we find a lower
+			* hierarchy number here, the array will never be smaller
+			*/
+			r = lxc_grow_array((void ***)&meta_data->hierarchies, &hierarchy_capacity, hierarchy_number + 1, 12);
+			if (r < 0)
+				goto out;
+
+			meta_data->maximum_hierarchy = hierarchy_number;
+		}
+
+		/* this shouldn't happen, we had this already */
+		if (meta_data->hierarchies[hierarchy_number])
+			goto out;
+
+		h = calloc(1, sizeof(struct cgroup_hierarchy));
+		if (!h)
+			goto out;
+
+		meta_data->hierarchies[hierarchy_number] = h;
+
+		h->index = hierarchy_number;
+		h->subsystems = lxc_string_split_and_trim(colon1, ',');
+		if (!h->subsystems)
+			goto out;
+		/* see if this hierarchy should be considered */
+		if (!all_kernel_subsystems || !all_named_subsystems) {
+			for (p = h->subsystems; *p; p++) {
+				if (!strncmp(*p, "name=", 5)) {
+					if (all_named_subsystems || (subsystem_whitelist && lxc_string_in_array(*p, subsystem_whitelist))) {
+						h->used = true;
+						break;
+					}
+				} else {
+					if (all_kernel_subsystems || (subsystem_whitelist && lxc_string_in_array(*p, subsystem_whitelist))) {
+						h->used = true;
+						break;
+					}
+				}
+			}
+		} else {
+			/* we want all hierarchy anyway */
+			h->used = true;
+		}
+	}
+	bret = true;
+
+out:
+	fclose(proc_self_cgroup);
+	free(line);
+	return bret;
+}
+
+/* Step 3: determine all mount points of each hierarchy */
+static bool find_hierarchy_mountpts( struct cgroup_meta_data *meta_data, char **kernel_subsystems)
+{
+	bool bret = false;
+	FILE *proc_self_mountinfo;
+	char *line = NULL;
+	size_t sz = 0;
+	char **tokens = NULL;
+	size_t mount_point_count = 0;
+	size_t mount_point_capacity = 0;
+	size_t token_capacity = 0;
+	int r;
+
+	proc_self_mountinfo = fopen_cloexec("/proc/self/mountinfo", "r");
+	/* if for some reason (because of setns() and pid namespace for example),
+	 * /proc/self is not valid, we try /proc/1/cgroup... */
+	if (!proc_self_mountinfo)
+		proc_self_mountinfo = fopen_cloexec("/proc/1/mountinfo", "r");
+	if (!proc_self_mountinfo)
+		return false;
+
+	while (getline(&line, &sz, proc_self_mountinfo) != -1) {
+		char *token, *line_tok, *saveptr = NULL;
+		size_t i, j, k;
+		struct cgroup_mount_point *mount_point;
+		struct cgroup_hierarchy *h;
+		char **subsystems;
+
+		if (line[0] && line[strlen(line) - 1] == '\n')
+			line[strlen(line) - 1] = '\0';
+
+		for (i = 0, line_tok = line; (token = strtok_r(line_tok, " ", &saveptr)); line_tok = NULL) {
+			r = lxc_grow_array((void ***)&tokens, &token_capacity, i + 1, 64);
+			if (r < 0)
+				goto out;
+			tokens[i++] = token;
+		}
+
+		/* layout of /proc/self/mountinfo:
+		 *      0: id
+		 *      1: parent id
+		 *      2: device major:minor
+		 *      3: mount prefix
+		 *      4: mount point
+		 *      5: per-mount options
+		 *    [optional X]: additional data
+		 *    X+7: "-"
+		 *    X+8: type
+		 *    X+9: source
+		 *    X+10: per-superblock options
+		 */
+		for (j = 6; j < i && tokens[j]; j++)
+			if (!strcmp(tokens[j], "-"))
+				break;
+
+		/* could not find separator */
+		if (j >= i || !tokens[j])
+			continue;
+		/* there should be exactly three fields after
+		 * the separator
+		 */
+		if (i != j + 4)
+			continue;
+
+		/* not a cgroup filesystem */
+		if (strcmp(tokens[j + 1], "cgroup") != 0)
+			continue;
+
+		subsystems = subsystems_from_mount_options(tokens[j + 3], kernel_subsystems);
+		if (!subsystems)
+			goto out;
+
+		h = NULL;
+		for (k = 1; k <= meta_data->maximum_hierarchy; k++) {
+			if (meta_data->hierarchies[k] &&
+			    meta_data->hierarchies[k]->subsystems[0] &&
+			    lxc_string_in_array(meta_data->hierarchies[k]->subsystems[0], (const char **)subsystems)) {
+				/* TODO: we could also check if the lists really match completely,
+				 *       just to have an additional sanity check */
+				h = meta_data->hierarchies[k];
+				break;
+			}
+		}
+		lxc_free_array((void **)subsystems, free);
+
+		r = lxc_grow_array((void ***)&meta_data->mount_points, &mount_point_capacity, mount_point_count + 1, 12);
+		if (r < 0)
+			goto out;
+
+		/* create mount point object */
+		mount_point = calloc(1, sizeof(*mount_point));
+		if (!mount_point)
+			goto out;
+
+		meta_data->mount_points[mount_point_count++] = mount_point;
+
+		mount_point->hierarchy = h;
+		mount_point->mount_point = strdup(tokens[4]);
+		mount_point->mount_prefix = strdup(tokens[3]);
+		if (!mount_point->mount_point || !mount_point->mount_prefix)
+			goto out;
+		mount_point->read_only = !lxc_string_in_list("rw", tokens[5], ',');
+
+		if (!strcmp(mount_point->mount_prefix, "/")) {
+			if (mount_point->read_only) {
+				if (!h->ro_absolute_mount_point)
+					h->ro_absolute_mount_point = mount_point;
+			} else {
+				if (!h->rw_absolute_mount_point)
+					h->rw_absolute_mount_point = mount_point;
+			}
+		}
+
+		k = lxc_array_len((void **)h->all_mount_points);
+		r = lxc_grow_array((void ***)&h->all_mount_points, &h->all_mount_point_capacity, k + 1, 4);
+		if (r < 0)
+			goto out;
+		h->all_mount_points[k] = mount_point;
+	}
+	bret = true;
+
+out:
+	fclose(proc_self_mountinfo);
+	free(tokens);
+	free(line);
+	return bret;
+}
+
+struct cgroup_meta_data *lxc_cgroup_load_meta2(const char **subsystem_whitelist)
+{
+	bool all_kernel_subsystems = true;
+	bool all_named_subsystems = false;
+	struct cgroup_meta_data *meta_data = NULL;
+	char **kernel_subsystems = NULL;
+	int saved_errno = 0;
+
+	/* if the subsystem whitelist is not specified, include all
+	 * hierarchies that contain kernel subsystems by default but
+	 * no hierarchies that only contain named subsystems
+	 *
+	 * if it is specified, the specifier @all will select all
+	 * hierarchies, @kernel will select all hierarchies with
+	 * kernel subsystems and @named will select all named
+	 * hierarchies
+	 */
+	all_kernel_subsystems = subsystem_whitelist ?
+		(lxc_string_in_array("@kernel", subsystem_whitelist) || lxc_string_in_array("@all", subsystem_whitelist)) :
+		true;
+	all_named_subsystems = subsystem_whitelist ?
+		(lxc_string_in_array("@named", subsystem_whitelist) || lxc_string_in_array("@all", subsystem_whitelist)) :
+		false;
+
+	meta_data = calloc(1, sizeof(struct cgroup_meta_data));
+	if (!meta_data)
+		return NULL;
+	meta_data->ref = 1;
+
+	if (!find_cgroup_subsystems(&kernel_subsystems))
+		goto out_error;
+
+	if (!find_cgroup_hierarchies(meta_data, all_kernel_subsystems,
+				all_named_subsystems, subsystem_whitelist))
+		goto out_error;
+
+	if (!find_hierarchy_mountpts(meta_data, kernel_subsystems))
+		goto out_error;
+
+	/* oops, we couldn't find anything */
+	if (!meta_data->hierarchies || !meta_data->mount_points) {
+		errno = EINVAL;
+		goto out_error;
+	}
+
+	lxc_free_array((void **)kernel_subsystems, free);
+	return meta_data;
+
+out_error:
+	saved_errno = errno;
+	lxc_free_array((void **)kernel_subsystems, free);
+	lxc_cgroup_put_meta(meta_data);
+	errno = saved_errno;
+	return NULL;
+}
+
+struct cgroup_meta_data *lxc_cgroup_get_meta(struct cgroup_meta_data *meta_data)
+{
+	meta_data->ref++;
+	return meta_data;
+}
+
+struct cgroup_meta_data *lxc_cgroup_put_meta(struct cgroup_meta_data *meta_data)
+{
+	size_t i;
+	if (!meta_data)
+		return NULL;
+	if (--meta_data->ref > 0)
+		return meta_data;
+	lxc_free_array((void **)meta_data->mount_points, (lxc_free_fn)lxc_cgroup_mount_point_free);
+	if (meta_data->hierarchies) {
+		for (i = 0; i <= meta_data->maximum_hierarchy; i++)
+			lxc_cgroup_hierarchy_free(meta_data->hierarchies[i]);
+	}
+	free(meta_data->hierarchies);
+	free(meta_data);
+	return NULL;
+}
+
+struct cgroup_hierarchy *lxc_cgroup_find_hierarchy(struct cgroup_meta_data *meta_data, const char *subsystem)
+{
+	size_t i;
+	for (i = 0; i <= meta_data->maximum_hierarchy; i++) {
+		struct cgroup_hierarchy *h = meta_data->hierarchies[i];
+		if (h && lxc_string_in_array(subsystem, (const char **)h->subsystems))
+			return h;
+	}
+	return NULL;
+}
+
+struct cgroup_mount_point *lxc_cgroup_find_mount_point(struct cgroup_hierarchy *hierarchy, const char *group, bool should_be_writable)
+{
+	struct cgroup_mount_point **mps;
+	struct cgroup_mount_point *current_result = NULL;
+	ssize_t quality = -1;
+
+	/* trivial case */
+	if (hierarchy->rw_absolute_mount_point)
+		return hierarchy->rw_absolute_mount_point;
+	if (!should_be_writable && hierarchy->ro_absolute_mount_point)
+		return hierarchy->ro_absolute_mount_point;
+
+	for (mps = hierarchy->all_mount_points; mps && *mps; mps++) {
+		struct cgroup_mount_point *mp = *mps;
+		size_t prefix_len = mp->mount_prefix ? strlen(mp->mount_prefix) : 0;
+
+		if (prefix_len == 1 && mp->mount_prefix[0] == '/')
+			prefix_len = 0;
+
+		if (should_be_writable && mp->read_only)
+			continue;
+
+		if (!prefix_len ||
+		    (strncmp(group, mp->mount_prefix, prefix_len) == 0 &&
+		     (group[prefix_len] == '\0' || group[prefix_len] == '/'))) {
+			/* search for the best quality match, i.e. the match with the
+			 * shortest prefix where this group is still contained
+			 */
+			if (quality == -1 || prefix_len < quality) {
+				current_result = mp;
+				quality = prefix_len;
+			}
+		}
+	}
+
+	if (!current_result)
+		errno = ENOENT;
+	return current_result;
+}
+
+char *lxc_cgroup_find_abs_path(const char *subsystem, const char *group, bool should_be_writable, const char *suffix)
+{
+	struct cgroup_meta_data *meta_data;
+	struct cgroup_hierarchy *h;
+	struct cgroup_mount_point *mp;
+	char *result;
+	int saved_errno;
+
+	meta_data = lxc_cgroup_load_meta();
+	if (!meta_data)
+		return NULL;
+
+	h = lxc_cgroup_find_hierarchy(meta_data, subsystem);
+	if (!h)
+		goto out_error;
+
+	mp = lxc_cgroup_find_mount_point(h, group, should_be_writable);
+	if (!mp)
+		goto out_error;
+
+	result = cgroup_to_absolute_path(mp, group, suffix);
+	if (!result)
+		goto out_error;
+
+	lxc_cgroup_put_meta(meta_data);
+	return result;
+
+out_error:
+	saved_errno = errno;
+	lxc_cgroup_put_meta(meta_data);
+	errno = saved_errno;
+	return NULL;
+}
+
+struct cgroup_process_info *lxc_cgroup_process_info_get(pid_t pid, struct cgroup_meta_data *meta)
+{
+	char pid_buf[32];
+	snprintf(pid_buf, 32, "/proc/%lu/cgroup", (unsigned long)pid);
+	return lxc_cgroup_process_info_getx(pid_buf, meta);
+}
+
+struct cgroup_process_info *lxc_cgroup_process_info_get_init(struct cgroup_meta_data *meta)
+{
+	return lxc_cgroup_process_info_get(1, meta);
+}
+
+struct cgroup_process_info *lxc_cgroup_process_info_get_self(struct cgroup_meta_data *meta)
+{
+	struct cgroup_process_info *i;
+	i = lxc_cgroup_process_info_getx("/proc/self/cgroup", meta);
+	if (!i)
+		i = lxc_cgroup_process_info_get(getpid(), meta);
+	return i;
+}
+
+/*
+ * If a controller has ns cgroup mounted, then in that cgroup the handler->pid
+ * is already in a new cgroup named after the pid.  'mnt' is passed in as
+ * the full current cgroup.  Say that is /sys/fs/cgroup/lxc/2975 and the container
+ * name is c1. .  We want to rename the cgroup directory to /sys/fs/cgroup/lxc/c1,
+ * and return the string /sys/fs/cgroup/lxc/c1.
+ */
+static char *cgroup_rename_nsgroup(const char *mountpath, const char *oldname, pid_t pid, const char *name)
+{
+	char *dir, *fulloldpath;
+	char *newname, *fullnewpath;
+	int len, newlen, ret;
+
+	/*
+	 * if cgroup is mounted at /cgroup and task is in cgroup /ab/, pid 2375 and
+	 * name is c1,
+	 * dir: /ab
+	 * fulloldpath = /cgroup/ab/2375
+	 * fullnewpath = /cgroup/ab/c1
+	 * newname = /ab/c1
+	 */
+	dir = alloca(strlen(oldname) + 1);
+	strcpy(dir, oldname);
+
+	len = strlen(oldname) + strlen(mountpath) + 22;
+	fulloldpath = alloca(len);
+	ret = snprintf(fulloldpath, len, "%s/%s/%ld", mountpath, oldname, (unsigned long)pid);
+	if (ret < 0 || ret >= len)
+		return NULL;
+
+	len = strlen(dir) + strlen(name) + 2;
+	newname = malloc(len);
+	if (!newname) {
+		SYSERROR("Out of memory");
+		return NULL;
+	}
+	ret = snprintf(newname, len, "%s/%s", dir, name);
+	if (ret < 0 || ret >= len) {
+		free(newname);
+		return NULL;
+	}
+
+	newlen = strlen(mountpath) + len + 2;
+	fullnewpath = alloca(newlen);
+	ret = snprintf(fullnewpath, newlen, "%s/%s", mountpath, newname);
+	if (ret < 0 || ret >= newlen) {
+		free(newname);
+		return NULL;
+	}
+
+	if (access(fullnewpath, F_OK) == 0) {
+		if (rmdir(fullnewpath) != 0) {
+			SYSERROR("container cgroup %s already exists.", fullnewpath);
+			free(newname);
+			return NULL;
+		}
+	}
+	if (rename(fulloldpath, fullnewpath)) {
+		SYSERROR("failed to rename cgroup %s->%s", fulloldpath, fullnewpath);
+		free(newname);
+		return NULL;
+	}
+
+	DEBUG("'%s' renamed to '%s'", oldname, newname);
+
+	return newname;
+}
+
+/* create a new cgroup */
+struct cgroup_process_info *lxc_cgroupfs_create(const char *name, const char *path_pattern, struct cgroup_meta_data *meta_data, const char *sub_pattern)
+{
+	char **cgroup_path_components = NULL;
+	char **p = NULL;
+	char *path_so_far = NULL;
+	char **new_cgroup_paths = NULL;
+	char **new_cgroup_paths_sub = NULL;
+	struct cgroup_mount_point *mp;
+	struct cgroup_hierarchy *h;
+	struct cgroup_process_info *base_info = NULL;
+	struct cgroup_process_info *info_ptr;
+	int saved_errno;
+	int r;
+	unsigned suffix = 0;
+	bool had_sub_pattern = false;
+	size_t i;
+
+	if (!is_valid_cgroup(name)) {
+		ERROR("Invalid cgroup name: '%s'", name);
+		errno = EINVAL;
+		return NULL;
+	}
+
+	if (!strstr(path_pattern, "%n")) {
+		ERROR("Invalid cgroup path pattern: '%s'; contains no %%n for specifying container name", path_pattern);
+		errno = EINVAL;
+		return NULL;
+	}
+
+	/* we will modify the result of this operation directly,
+	 * so we don't have to copy the data structure
+	 */
+	base_info = (path_pattern[0] == '/') ?
+		lxc_cgroup_process_info_get_init(meta_data) :
+		lxc_cgroup_process_info_get_self(meta_data);
+	if (!base_info)
+		return NULL;
+
+	new_cgroup_paths = calloc(meta_data->maximum_hierarchy + 1, sizeof(char *));
+	if (!new_cgroup_paths)
+		goto out_initial_error;
+
+	new_cgroup_paths_sub = calloc(meta_data->maximum_hierarchy + 1, sizeof(char *));
+	if (!new_cgroup_paths_sub)
+		goto out_initial_error;
+
+	/* find mount points we can use */
+	for (info_ptr = base_info; info_ptr; info_ptr = info_ptr->next) {
+		h = info_ptr->hierarchy;
+		mp = lxc_cgroup_find_mount_point(h, info_ptr->cgroup_path, true);
+		if (!mp) {
+			ERROR("Could not find writable mount point for cgroup hierarchy %d while trying to create cgroup.", h->index);
+			goto out_initial_error;
+		}
+		info_ptr->designated_mount_point = mp;
+
+		if (lxc_string_in_array("ns", (const char **)h->subsystems))
+			continue;
+		if (handle_cgroup_settings(mp, info_ptr->cgroup_path) < 0) {
+			ERROR("Could not set clone_children to 1 for cpuset hierarchy in parent cgroup.");
+			goto out_initial_error;
+		}
+	}
+
+	/* normalize the path */
+	cgroup_path_components = lxc_normalize_path(path_pattern);
+	if (!cgroup_path_components)
+		goto out_initial_error;
+
+	/* go through the path components to see if we can create them */
+	for (p = cgroup_path_components; *p || (sub_pattern && !had_sub_pattern); p++) {
+		/* we only want to create the same component with -1, -2, etc.
+		 * if the component contains the container name itself, otherwise
+		 * it's not an error if it already exists
+		 */
+		char *p_eff = *p ? *p : (char *)sub_pattern;
+		bool contains_name = strstr(p_eff, "%n");
+		char *current_component = NULL;
+		char *current_subpath = NULL;
+		char *current_entire_path = NULL;
+		char *parts[3];
+		size_t j = 0;
+		i = 0;
+
+		/* if we are processing the subpattern, we want to make sure
+		 * loop is ended the next time around
+		 */
+		if (!*p) {
+			had_sub_pattern = true;
+			p--;
+		}
+
+		goto find_name_on_this_level;
+	
+	cleanup_name_on_this_level:
+		/* This is reached if we found a name clash.
+		 * In that case, remove the cgroup from all previous hierarchies
+		 */
+		for (j = 0, info_ptr = base_info; j < i && info_ptr; info_ptr = info_ptr->next, j++) {
+			r = remove_cgroup(info_ptr->designated_mount_point, info_ptr->created_paths[info_ptr->created_paths_count - 1], false);
+			if (r < 0)
+				WARN("could not clean up cgroup we created when trying to create container");
+			free(info_ptr->created_paths[info_ptr->created_paths_count - 1]);
+			info_ptr->created_paths[--info_ptr->created_paths_count] = NULL;
+		}
+		if (current_component != current_subpath)
+			free(current_subpath);
+		if (current_component != p_eff)
+			free(current_component);
+		current_component = current_subpath = NULL;
+		/* try again with another suffix */
+		++suffix;
+	
+	find_name_on_this_level:
+		/* determine name of the path component we should create */
+		if (contains_name && suffix > 0) {
+			char *buf = calloc(strlen(name) + 32, 1);
+			if (!buf)
+				goto out_initial_error;
+			snprintf(buf, strlen(name) + 32, "%s-%u", name, suffix);
+			current_component = lxc_string_replace("%n", buf, p_eff);
+			free(buf);
+		} else {
+			current_component = contains_name ? lxc_string_replace("%n", name, p_eff) : p_eff;
+		}
+		parts[0] = path_so_far;
+		parts[1] = current_component;
+		parts[2] = NULL;
+		current_subpath = path_so_far ? lxc_string_join("/", (const char **)parts, false) : current_component;
+
+		/* Now go through each hierarchy and try to create the
+		 * corresponding cgroup
+		 */
+		for (i = 0, info_ptr = base_info; info_ptr; info_ptr = info_ptr->next, i++) {
+			char *parts2[3];
+
+			if (lxc_string_in_array("ns", (const char **)info_ptr->hierarchy->subsystems))
+				continue;
+			current_entire_path = NULL;
+
+			parts2[0] = !strcmp(info_ptr->cgroup_path, "/") ? "" : info_ptr->cgroup_path;
+			parts2[1] = current_subpath;
+			parts2[2] = NULL;
+			current_entire_path = lxc_string_join("/", (const char **)parts2, false);
+
+			if (!*p) {
+				/* we are processing the subpath, so only update that one */
+				free(new_cgroup_paths_sub[i]);
+				new_cgroup_paths_sub[i] = strdup(current_entire_path);
+				if (!new_cgroup_paths_sub[i])
+					goto cleanup_from_error;
+			} else {
+				/* remember which path was used on this controller */
+				free(new_cgroup_paths[i]);
+				new_cgroup_paths[i] = strdup(current_entire_path);
+				if (!new_cgroup_paths[i])
+					goto cleanup_from_error;
+			}
+
+			r = create_cgroup(info_ptr->designated_mount_point, current_entire_path);
+			if (r < 0 && errno == EEXIST && contains_name) {
+				/* name clash => try new name with new suffix */
+				free(current_entire_path);
+				current_entire_path = NULL;
+				goto cleanup_name_on_this_level;
+			} else if (r < 0 && errno != EEXIST) {
+				SYSERROR("Could not create cgroup %s", current_entire_path);
+				goto cleanup_from_error;
+			} else if (r == 0) {
+				/* successfully created */
+				r = lxc_grow_array((void ***)&info_ptr->created_paths, &info_ptr->created_paths_capacity, info_ptr->created_paths_count + 1, 8);
+				if (r < 0)
+					goto cleanup_from_error;
+				if (!init_cpuset_if_needed(info_ptr->designated_mount_point, current_entire_path)) {
+					ERROR("Failed to initialize cpuset in new '%s'.", current_entire_path);
+					goto cleanup_from_error;
+				}
+				info_ptr->created_paths[info_ptr->created_paths_count++] = current_entire_path;
+			} else {
+				/* if we didn't create the cgroup, then we have to make sure that
+				 * further cgroups will be created properly
+				 */
+				if (handle_cgroup_settings(info_ptr->designated_mount_point, info_ptr->cgroup_path) < 0) {
+					ERROR("Could not set clone_children to 1 for cpuset hierarchy in pre-existing cgroup.");
+					goto cleanup_from_error;
+				}
+				if (!init_cpuset_if_needed(info_ptr->designated_mount_point, info_ptr->cgroup_path)) {
+					ERROR("Failed to initialize cpuset in pre-existing '%s'.", info_ptr->cgroup_path);
+					goto cleanup_from_error;
+				}
+
+				/* already existed but path component of pattern didn't contain '%n',
+				 * so this is not an error; but then we don't need current_entire_path
+				 * anymore...
+				 */
+				free(current_entire_path);
+				current_entire_path = NULL;
+			}
+		}
+
+		/* save path so far */
+		free(path_so_far);
+		path_so_far = strdup(current_subpath);
+		if (!path_so_far)
+			goto cleanup_from_error;
+
+		/* cleanup */
+		if (current_component != current_subpath)
+			free(current_subpath);
+		if (current_component != p_eff)
+			free(current_component);
+		current_component = current_subpath = NULL;
+		continue;
+	
+	cleanup_from_error:
+		/* called if an error occured in the loop, so we
+		 * do some additional cleanup here
+		 */
+		saved_errno = errno;
+		if (current_component != current_subpath)
+			free(current_subpath);
+		if (current_component != p_eff)
+			free(current_component);
+		free(current_entire_path);
+		errno = saved_errno;
+		goto out_initial_error;
+	}
+
+	/* we're done, now update the paths */
+	for (i = 0, info_ptr = base_info; info_ptr; info_ptr = info_ptr->next, i++) {
+		/* ignore legacy 'ns' subsystem here, lxc_cgroup_create_legacy
+		 * will take care of it
+		 * Since we do a continue in above loop, new_cgroup_paths[i] is
+		 * unset anyway, as is new_cgroup_paths_sub[i]
+		 */
+		if (lxc_string_in_array("ns", (const char **)info_ptr->hierarchy->subsystems))
+			continue;
+		free(info_ptr->cgroup_path);
+		info_ptr->cgroup_path = new_cgroup_paths[i];
+		info_ptr->cgroup_path_sub = new_cgroup_paths_sub[i];
+	}
+	/* don't use lxc_free_array since we used the array members
+	 * to store them in our result...
+	 */
+	free(new_cgroup_paths);
+	free(new_cgroup_paths_sub);
+	free(path_so_far);
+	lxc_free_array((void **)cgroup_path_components, free);
+	return base_info;
+
+out_initial_error:
+	saved_errno = errno;
+	free(path_so_far);
+	lxc_cgroup_process_info_free_and_remove(base_info);
+	lxc_free_array((void **)new_cgroup_paths, free);
+	lxc_free_array((void **)new_cgroup_paths_sub, free);
+	lxc_free_array((void **)cgroup_path_components, free);
+	errno = saved_errno;
+	return NULL;
+}
+
+int lxc_cgroup_create_legacy(struct cgroup_process_info *base_info, const char *name, pid_t pid)
+{
+	struct cgroup_process_info *info_ptr;
+	int r;
+
+	for (info_ptr = base_info; info_ptr; info_ptr = info_ptr->next) {
+		if (!lxc_string_in_array("ns", (const char **)info_ptr->hierarchy->subsystems))
+			continue;
+		/*
+		 * For any path which has ns cgroup mounted, handler->pid is already
+		 * moved into a container called '%d % (handler->pid)'.  Rename it to
+		 * the cgroup name and record that.
+		 */
+		char *tmp = cgroup_rename_nsgroup((const char *)info_ptr->designated_mount_point->mount_point,
+				info_ptr->cgroup_path, pid, name);
+		if (!tmp)
+			return -1;
+		free(info_ptr->cgroup_path);
+		info_ptr->cgroup_path = tmp;
+		r = lxc_grow_array((void ***)&info_ptr->created_paths, &info_ptr->created_paths_capacity, info_ptr->created_paths_count + 1, 8);
+		if (r < 0)
+			return -1;
+		tmp = strdup(tmp);
+		if (!tmp)
+			return -1;
+		info_ptr->created_paths[info_ptr->created_paths_count++] = tmp;
+	}
+	return 0;
+}
+
+/* get the cgroup membership of a given container */
+struct cgroup_process_info *lxc_cgroup_get_container_info(const char *name, const char *lxcpath, struct cgroup_meta_data *meta_data)
+{
+	struct cgroup_process_info *result = NULL;
+	int saved_errno = 0;
+	size_t i;
+	struct cgroup_process_info **cptr = &result;
+	struct cgroup_process_info *entry = NULL;
+	char *path = NULL;
+
+	for (i = 0; i <= meta_data->maximum_hierarchy; i++) {
+		struct cgroup_hierarchy *h = meta_data->hierarchies[i];
+		if (!h || !h->used)
+			continue;
+
+		/* use the command interface to look for the cgroup */
+		path = lxc_cmd_get_cgroup_path(name, lxcpath, h->subsystems[0]);
+		if (!path)
+			goto out_error;
+
+		entry = calloc(1, sizeof(struct cgroup_process_info));
+		if (!entry)
+			goto out_error;
+		entry->meta_ref = lxc_cgroup_get_meta(meta_data);
+		entry->hierarchy = h;
+		entry->cgroup_path = path;
+		path = NULL;
+
+		/* it is not an error if we don't find anything here,
+		 * it is up to the caller to decide what to do in that
+		 * case */
+		entry->designated_mount_point = lxc_cgroup_find_mount_point(h, entry->cgroup_path, true);
+
+		*cptr = entry;
+		cptr = &entry->next;
+		entry = NULL;
+	}
+
+	return result;
+out_error:
+	saved_errno = errno;
+	free(path);
+	lxc_cgroup_process_info_free(result);
+	lxc_cgroup_process_info_free(entry);
+	errno = saved_errno;
+	return NULL;
+}
+
+/* move a processs to the cgroups specified by the membership */
+int lxc_cgroupfs_enter(struct cgroup_process_info *info, pid_t pid, bool enter_sub)
+{
+	char pid_buf[32];
+	char *cgroup_tasks_fn;
+	int r;
+	struct cgroup_process_info *info_ptr;
+
+	snprintf(pid_buf, 32, "%lu", (unsigned long)pid);
+	for (info_ptr = info; info_ptr; info_ptr = info_ptr->next) {
+		char *cgroup_path = (enter_sub && info_ptr->cgroup_path_sub) ?
+			info_ptr->cgroup_path_sub :
+			info_ptr->cgroup_path;
+
+		if (!info_ptr->designated_mount_point) {
+			info_ptr->designated_mount_point = lxc_cgroup_find_mount_point(info_ptr->hierarchy, cgroup_path, true);
+			if (!info_ptr->designated_mount_point) {
+				SYSERROR("Could not add pid %lu to cgroup %s: internal error (couldn't find any writable mountpoint to cgroup filesystem)", (unsigned long)pid, cgroup_path);
+				return -1;
+			}
+		}
+
+		cgroup_tasks_fn = cgroup_to_absolute_path(info_ptr->designated_mount_point, cgroup_path, "/tasks");
+		if (!cgroup_tasks_fn) {
+			SYSERROR("Could not add pid %lu to cgroup %s: internal error", (unsigned long)pid, cgroup_path);
+			return -1;
+		}
+
+		r = lxc_write_to_file(cgroup_tasks_fn, pid_buf, strlen(pid_buf), false);
+		free(cgroup_tasks_fn);
+		if (r < 0) {
+			SYSERROR("Could not add pid %lu to cgroup %s: internal error", (unsigned long)pid, cgroup_path);
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+/* free process membership information */
+void lxc_cgroup_process_info_free(struct cgroup_process_info *info)
+{
+	struct cgroup_process_info *next;
+	if (!info)
+		return;
+	next = info->next;
+	lxc_cgroup_put_meta(info->meta_ref);
+	free(info->cgroup_path);
+	free(info->cgroup_path_sub);
+	lxc_free_array((void **)info->created_paths, free);
+	free(info);
+	lxc_cgroup_process_info_free(next);
+}
+
+/* free process membership information and remove cgroups that were created */
+void lxc_cgroup_process_info_free_and_remove(struct cgroup_process_info *info)
+{
+	struct cgroup_process_info *next;
+	char **pp;
+	if (!info)
+		return;
+	next = info->next;
+	{
+		struct cgroup_mount_point *mp = info->designated_mount_point;
+		if (!mp)
+			mp = lxc_cgroup_find_mount_point(info->hierarchy, info->cgroup_path, true);
+		if (mp)
+			/* ignore return value here, perhaps we created the
+			 * '/lxc' cgroup in this container but another container
+			 * is still running (for example)
+			 */
+			(void)remove_cgroup(mp, info->cgroup_path, true);
+	}
+	for (pp = info->created_paths; pp && *pp; pp++);
+	for ((void)(pp && --pp); info->created_paths && pp >= info->created_paths; --pp) {
+		free(*pp);
+	}
+	free(info->created_paths);
+	lxc_cgroup_put_meta(info->meta_ref);
+	free(info->cgroup_path);
+	free(info->cgroup_path_sub);
+	free(info);
+	lxc_cgroup_process_info_free_and_remove(next);
+}
+
+static char *lxc_cgroup_get_hierarchy_path_handler(const char *subsystem, struct lxc_handler *handler)
+{
+	struct cgfs_data *d = handler->cgroup_info->data;
+	struct cgroup_process_info *info = d->info;
+	info = find_info_for_subsystem(info, subsystem);
+	if (!info)
+		return NULL;
+	return info->cgroup_path;
+}
+
+char *lxc_cgroup_get_hierarchy_path(const char *subsystem, const char *name, const char *lxcpath)
+{
+	return lxc_cmd_get_cgroup_path(name, lxcpath, subsystem);
+}
+
+char *lxc_cgroup_get_hierarchy_abs_path_handler(const char *subsystem, struct lxc_handler *handler)
+{
+	struct cgfs_data *d = handler->cgroup_info->data;
+	struct cgroup_process_info *info = d->info;
+	struct cgroup_mount_point *mp = NULL;
+
+	info = find_info_for_subsystem(info, subsystem);
+	if (!info)
+		return NULL;
+	if (info->designated_mount_point) {
+		mp = info->designated_mount_point;
+	} else {
+		mp = lxc_cgroup_find_mount_point(info->hierarchy, info->cgroup_path, true);
+		if (!mp)
+			return NULL;
+	}
+	return cgroup_to_absolute_path(mp, info->cgroup_path, NULL);
+}
+
+char *lxc_cgroup_get_hierarchy_abs_path(const char *subsystem, const char *name, const char *lxcpath)
+{
+	struct cgroup_meta_data *meta;
+	struct cgroup_process_info *base_info, *info;
+	struct cgroup_mount_point *mp;
+	char *result = NULL;
+
+	meta = lxc_cgroup_load_meta();
+	if (!meta)
+		return NULL;
+	base_info = lxc_cgroup_get_container_info(name, lxcpath, meta);
+	if (!base_info)
+		goto out1;
+	info = find_info_for_subsystem(base_info, subsystem);
+	if (!info)
+		goto out2;
+	if (info->designated_mount_point) {
+		mp = info->designated_mount_point;
+	} else {
+		mp = lxc_cgroup_find_mount_point(info->hierarchy, info->cgroup_path, true);
+		if (!mp)
+			goto out3;
+	}
+	result = cgroup_to_absolute_path(mp, info->cgroup_path, NULL);
+out3:
+out2:
+	lxc_cgroup_process_info_free(base_info);
+out1:
+	lxc_cgroup_put_meta(meta);
+	return result;
+}
+
+int lxc_cgroup_set_handler(const char *filename, const char *value, struct lxc_handler *handler)
+{
+	char *subsystem = NULL, *p, *path;
+	int ret = -1;
+
+	subsystem = alloca(strlen(filename) + 1);
+	strcpy(subsystem, filename);
+	if ((p = index(subsystem, '.')) != NULL)
+		*p = '\0';
+
+	path = lxc_cgroup_get_hierarchy_abs_path_handler(subsystem, handler);
+	if (path) {
+		ret = do_cgroup_set(path, filename, value);
+		free(path);
+	}
+	return ret;
+}
+
+int lxc_cgroup_get_handler(const char *filename, char *value, size_t len, struct lxc_handler *handler)
+{
+	char *subsystem = NULL, *p, *path;
+	int ret = -1;
+
+	subsystem = alloca(strlen(filename) + 1);
+	strcpy(subsystem, filename);
+	if ((p = index(subsystem, '.')) != NULL)
+		*p = '\0';
+
+	path = lxc_cgroup_get_hierarchy_abs_path_handler(subsystem, handler);
+	if (path) {
+		ret = do_cgroup_get(path, filename, value, len);
+		free(path);
+	}
+	return ret;
+}
+
+int lxc_cgroupfs_set(const char *filename, const char *value, const char *name, const char *lxcpath)
+{
+	char *subsystem = NULL, *p, *path;
+	int ret = -1;
+
+	subsystem = alloca(strlen(filename) + 1);
+	strcpy(subsystem, filename);
+	if ((p = index(subsystem, '.')) != NULL)
+		*p = '\0';
+
+	path = lxc_cgroup_get_hierarchy_abs_path(subsystem, name, lxcpath);
+	if (path) {
+		ret = do_cgroup_set(path, filename, value);
+		free(path);
+	}
+	return ret;
+}
+
+int lxc_cgroupfs_get(const char *filename, char *value, size_t len, const char *name, const char *lxcpath)
+{
+	char *subsystem = NULL, *p, *path;
+	int ret = -1;
+
+	subsystem = alloca(strlen(filename) + 1);
+	strcpy(subsystem, filename);
+	if ((p = index(subsystem, '.')) != NULL)
+		*p = '\0';
+
+	path = lxc_cgroup_get_hierarchy_abs_path(subsystem, name, lxcpath);
+	if (path) {
+		ret = do_cgroup_get(path, filename, value, len);
+		free(path);
+	}
+	return ret;
+}
+
+/*
+ * lxc_cgroup_path_get: Get the absolute pathname for a cgroup
+ * file for a running container.
+ *
+ * @filename  : the file of interest (e.g. "freezer.state") or
+ *              the subsystem name (e.g. "freezer") in which case
+ *              the directory where the cgroup may be modified
+ *              will be returned
+ * @name      : name of container to connect to
+ * @lxcpath   : the lxcpath in which the container is running
+ *
+ * This is the exported function, which determines cgpath from the
+ * lxc-start of the @name container running in @lxcpath.
+ *
+ * Returns path on success, NULL on error. The caller must free()
+ * the returned path.
+ */
+char *lxc_cgroup_path_get(const char *filename, const char *name,
+                          const char *lxcpath)
+{
+	char *subsystem = NULL, *longer_file = NULL, *p, *group, *path;
+
+	subsystem = alloca(strlen(filename) + 1);
+	strcpy(subsystem, filename);
+	if ((p = index(subsystem, '.')) != NULL) {
+		*p = '\0';
+		longer_file = alloca(strlen(filename) + 2);
+		longer_file[0] = '/';
+		strcpy(longer_file + 1, filename);
+	}
+
+	group = lxc_cgroup_get_hierarchy_path(subsystem, name, lxcpath);
+	if (!group)
+		return NULL;
+
+	path = lxc_cgroup_find_abs_path(subsystem, group, true, p ? longer_file : NULL);
+	free(group);
+	return path;
+}
+
+static bool cgroupfs_mount_cgroup(const char *root,
+		struct lxc_cgroup_info *cgroup_info, int type)
+{
+	size_t bufsz = strlen(root) + sizeof("/sys/fs/cgroup");
+	char *path = NULL;
+	char **parts = NULL;
+	char *dirname = NULL;
+	char *abs_path = NULL;
+	char *abs_path2 = NULL;
+	struct cgfs_data *cgfs_d;
+	struct cgroup_process_info *info, *base_info;
+	int r, saved_errno = 0;
+
+	init_cg_ops();
+
+	cgfs_d = cgroup_info->data;
+	base_info = cgfs_d->info;
+
+	if (type < LXC_AUTO_CGROUP_RO || type > LXC_AUTO_CGROUP_FULL_MIXED) {
+		ERROR("could not mount cgroups into container: invalid type specified internally");
+		errno = EINVAL;
+		return false;
+	}
+
+	path = calloc(1, bufsz);
+	if (!path)
+		return false;
+	snprintf(path, bufsz, "%s/sys/fs/cgroup", root);
+	r = mount("cgroup_root", path, "tmpfs", MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_RELATIME, "size=10240k,mode=755");
+	if (r < 0) {
+		SYSERROR("could not mount tmpfs to /sys/fs/cgroup in the container");
+		return false;
+	}
+
+	/* now mount all the hierarchies we care about */
+	for (info = base_info; info; info = info->next) {
+		size_t subsystem_count, i;
+		struct cgroup_mount_point *mp = info->designated_mount_point;
+		if (!mp)
+			mp = lxc_cgroup_find_mount_point(info->hierarchy, info->cgroup_path, true);
+		if (!mp) {
+			SYSERROR("could not find original mount point for cgroup hierarchy while trying to mount cgroup filesystem");
+			goto out_error;
+		}
+
+		subsystem_count = lxc_array_len((void **)info->hierarchy->subsystems);
+		parts = calloc(subsystem_count + 1, sizeof(char *));
+		if (!parts)
+			goto out_error;
+
+		for (i = 0; i < subsystem_count; i++) {
+			if (!strncmp(info->hierarchy->subsystems[i], "name=", 5))
+				parts[i] = info->hierarchy->subsystems[i] + 5;
+			else
+				parts[i] = info->hierarchy->subsystems[i];
+		}
+		dirname = lxc_string_join(",", (const char **)parts, false);
+		if (!dirname)
+			goto out_error;
+
+		/* create subsystem directory */
+		abs_path = lxc_append_paths(path, dirname);
+		if (!abs_path)
+			goto out_error;
+		r = mkdir_p(abs_path, 0755);
+		if (r < 0 && errno != EEXIST) {
+			SYSERROR("could not create cgroup subsystem directory /sys/fs/cgroup/%s", dirname);
+			goto out_error;
+		}
+
+		abs_path2 = lxc_append_paths(abs_path, info->cgroup_path);
+		if (!abs_path2)
+			goto out_error;
+
+		if (type == LXC_AUTO_CGROUP_FULL_RO || type == LXC_AUTO_CGROUP_FULL_RW || type == LXC_AUTO_CGROUP_FULL_MIXED) {
+			/* bind-mount the cgroup entire filesystem there */
+			if (strcmp(mp->mount_prefix, "/") != 0) {
+				/* FIXME: maybe we should just try to remount the entire hierarchy
+				 *        with a regular mount command? may that works? */
+				ERROR("could not automatically mount cgroup-full to /sys/fs/cgroup/%s: host has no mount point for this cgroup filesystem that has access to the root cgroup", dirname);
+				goto out_error;
+			}
+			r = mount(mp->mount_point, abs_path, "none", MS_BIND, 0);
+			if (r < 0) {
+				SYSERROR("error bind-mounting %s to %s", mp->mount_point, abs_path);
+				goto out_error;
+			}
+			/* main cgroup path should be read-only */
+			if (type == LXC_AUTO_CGROUP_FULL_RO || type == LXC_AUTO_CGROUP_FULL_MIXED) {
+				r = mount(NULL, abs_path, NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL);
+				if (r < 0) {
+					SYSERROR("error re-mounting %s readonly", abs_path);
+					goto out_error;
+				}
+			}
+			/* own cgroup should be read-write */
+			if (type == LXC_AUTO_CGROUP_FULL_MIXED) {
+				r = mount(abs_path2, abs_path2, NULL, MS_BIND, NULL);
+				if (r < 0) {
+					SYSERROR("error bind-mounting %s onto itself", abs_path2);
+					goto out_error;
+				}
+				r = mount(NULL, abs_path2, NULL, MS_REMOUNT|MS_BIND, NULL);
+				if (r < 0) {
+					SYSERROR("error re-mounting %s readwrite", abs_path2);
+					goto out_error;
+				}
+			}
+		} else {
+			/* create path for container's cgroup */
+			r = mkdir_p(abs_path2, 0755);
+			if (r < 0 && errno != EEXIST) {
+				SYSERROR("could not create cgroup directory /sys/fs/cgroup/%s%s", dirname, info->cgroup_path);
+				goto out_error;
+			}
+
+			free(abs_path);
+			abs_path = NULL;
+
+			/* bind-mount container's cgroup to that directory */
+			abs_path = cgroup_to_absolute_path(mp, info->cgroup_path, NULL);
+			if (!abs_path)
+				goto out_error;
+			r = mount(abs_path, abs_path2, "none", MS_BIND, 0);
+			if (r < 0) {
+				SYSERROR("error bind-mounting %s to %s", abs_path, abs_path2);
+				goto out_error;
+			}
+			if (type == LXC_AUTO_CGROUP_RO) {
+				r = mount(NULL, abs_path2, NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL);
+				if (r < 0) {
+					SYSERROR("error re-mounting %s readonly", abs_path2);
+					goto out_error;
+				}
+			}
+		}
+
+		free(abs_path);
+		free(abs_path2);
+		abs_path = NULL;
+		abs_path2 = NULL;
+
+		/* add symlinks for every single subsystem */
+		if (subsystem_count > 1) {
+			for (i = 0; i < subsystem_count; i++) {
+				abs_path = lxc_append_paths(path, parts[i]);
+				if (!abs_path)
+					goto out_error;
+				r = symlink(dirname, abs_path);
+				if (r < 0)
+					WARN("could not create symlink %s -> %s in /sys/fs/cgroup of container", parts[i], dirname);
+				free(abs_path);
+				abs_path = NULL;
+			}
+		}
+		free(dirname);
+		free(parts);
+		dirname = NULL;
+		parts = NULL;
+	}
+
+	/* try to remount the tmpfs readonly, since the container shouldn't
+	 * change anything (this will also make sure that trying to create
+	 * new cgroups outside the allowed area fails with an error instead
+	 * of simply causing this to create directories in the tmpfs itself)
+	 */
+	if (type != LXC_AUTO_CGROUP_RW && type != LXC_AUTO_CGROUP_FULL_RW)
+		mount(NULL, path, NULL, MS_REMOUNT|MS_RDONLY, NULL);
+
+	free(path);
+
+	return true;
+
+out_error:
+	saved_errno = errno;
+	free(path);
+	free(dirname);
+	free(parts);
+	free(abs_path);
+	free(abs_path2);
+	errno = saved_errno;
+	return false;
+}
+
+int lxc_cgroup_nrtasks_handler(struct lxc_handler *handler)
+{
+	struct cgfs_data *d = handler->cgroup_info->data;
+	struct cgroup_process_info *info = d->info;
+	struct cgroup_mount_point *mp = NULL;
+	char *abs_path = NULL;
+	int ret;
+
+	if (!info) {
+		errno = ENOENT;
+		return -1;
+	}
+
+	if (info->designated_mount_point) {
+		mp = info->designated_mount_point;
+	} else {
+		mp = lxc_cgroup_find_mount_point(info->hierarchy, info->cgroup_path, false);
+		if (!mp)
+			return -1;
+	}
+
+	abs_path = cgroup_to_absolute_path(mp, info->cgroup_path, NULL);
+	if (!abs_path)
+		return -1;
+
+	ret = cgroup_recursive_task_count(abs_path);
+	free(abs_path);
+	return ret;
+}
+
+static struct cgroup_process_info *
+lxc_cgroup_process_info_getx(const char *proc_pid_cgroup_str,
+			     struct cgroup_meta_data *meta)
+{
+	struct cgroup_process_info *result = NULL;
+	FILE *proc_pid_cgroup = NULL;
+	char *line = NULL;
+	size_t sz = 0;
+	int saved_errno = 0;
+	struct cgroup_process_info **cptr = &result;
+	struct cgroup_process_info *entry = NULL;
+
+	proc_pid_cgroup = fopen_cloexec(proc_pid_cgroup_str, "r");
+	if (!proc_pid_cgroup)
+		return NULL;
+
+	while (getline(&line, &sz, proc_pid_cgroup) != -1) {
+		/* file format: hierarchy:subsystems:group */
+		char *colon1;
+		char *colon2;
+		char *endptr;
+		int hierarchy_number;
+		struct cgroup_hierarchy *h = NULL;
+
+		if (!line[0])
+			continue;
+
+		if (line[strlen(line) - 1] == '\n')
+			line[strlen(line) - 1] = '\0';
+
+		colon1 = strchr(line, ':');
+		if (!colon1)
+			continue;
+		*colon1++ = '\0';
+		colon2 = strchr(colon1, ':');
+		if (!colon2)
+			continue;
+		*colon2++ = '\0';
+
+		endptr = NULL;
+		hierarchy_number = strtoul(line, &endptr, 10);
+		if (!endptr || *endptr)
+			continue;
+
+		if (hierarchy_number > meta->maximum_hierarchy) {
+			/* we encountered a hierarchy we didn't have before,
+			 * so probably somebody remounted some stuff in the
+			 * mean time...
+			 */
+			errno = EAGAIN;
+			goto out_error;
+		}
+
+		h = meta->hierarchies[hierarchy_number];
+		if (!h) {
+			/* we encountered a hierarchy that was thought to be
+			 * dead before, so probably somebody remounted some
+			 * stuff in the mean time...
+			 */
+			errno = EAGAIN;
+			goto out_error;
+		}
+
+		/* we are told that we should ignore this hierarchy */
+		if (!h->used)
+			continue;
+
+		entry = calloc(1, sizeof(struct cgroup_process_info));
+		if (!entry)
+			goto out_error;
+
+		entry->meta_ref = lxc_cgroup_get_meta(meta);
+		entry->hierarchy = h;
+		entry->cgroup_path = strdup(colon2);
+		if (!entry->cgroup_path)
+			goto out_error;
+
+		*cptr = entry;
+		cptr = &entry->next;
+		entry = NULL;
+	}
+
+	fclose(proc_pid_cgroup);
+	free(line);
+	return result;
+
+out_error:
+	saved_errno = errno;
+	if (proc_pid_cgroup)
+		fclose(proc_pid_cgroup);
+	lxc_cgroup_process_info_free(result);
+	lxc_cgroup_process_info_free(entry);
+	free(line);
+	errno = saved_errno;
+	return NULL;
+}
+
+static char **subsystems_from_mount_options(const char *mount_options,
+					    char **kernel_list)
+{
+	char *token, *str, *saveptr = NULL;
+	char **result = NULL;
+	size_t result_capacity = 0;
+	size_t result_count = 0;
+	int saved_errno;
+	int r;
+
+	str = alloca(strlen(mount_options)+1);
+	strcpy(str, mount_options);
+	for (; (token = strtok_r(str, ",", &saveptr)); str = NULL) {
+		/* we have a subsystem if it's either in the list of
+		 * subsystems provided by the kernel OR if it starts
+		 * with name= for named hierarchies
+		 */
+		if (!strncmp(token, "name=", 5) || lxc_string_in_array(token, (const char **)kernel_list)) {
+			r = lxc_grow_array((void ***)&result, &result_capacity, result_count + 1, 12);
+			if (r < 0)
+				goto out_free;
+			result[result_count + 1] = NULL;
+			result[result_count] = strdup(token);
+			if (!result[result_count])
+				goto out_free;
+			result_count++;
+		}
+	}
+
+	return result;
+
+out_free:
+	saved_errno = errno;
+	lxc_free_array((void**)result, free);
+	errno = saved_errno;
+	return NULL;
+}
+
+static void lxc_cgroup_mount_point_free(struct cgroup_mount_point *mp)
+{
+	if (!mp)
+		return;
+	free(mp->mount_point);
+	free(mp->mount_prefix);
+	free(mp);
+}
+
+static void lxc_cgroup_hierarchy_free(struct cgroup_hierarchy *h)
+{
+	if (!h)
+		return;
+	lxc_free_array((void **)h->subsystems, free);
+	free(h->all_mount_points);
+	free(h);
+}
+
+static bool is_valid_cgroup(const char *name)
+{
+	const char *p;
+	for (p = name; *p; p++) {
+		/* Use the ASCII printable characters range(32 - 127)
+		 * is reasonable, we kick out 32(SPACE) because it'll
+		 * break legacy lxc-ls
+		 */
+		if (*p <= 32 || *p >= 127 || *p == '/')
+			return false;
+	}
+	return strcmp(name, ".") != 0 && strcmp(name, "..") != 0;
+}
+
+static int create_or_remove_cgroup(bool do_remove,
+		struct cgroup_mount_point *mp, const char *path, int recurse)
+{
+	int r, saved_errno = 0;
+	char *buf = cgroup_to_absolute_path(mp, path, NULL);
+	if (!buf)
+		return -1;
+
+	/* create or remove directory */
+	if (do_remove) {
+		if (recurse)
+			r = cgroup_rmdir(buf);
+		else
+			r = rmdir(buf);
+	} else
+		r = mkdir(buf, 0777);
+	saved_errno = errno;
+	free(buf);
+	errno = saved_errno;
+	return r;
+}
+
+static int create_cgroup(struct cgroup_mount_point *mp, const char *path)
+{
+	return create_or_remove_cgroup(false, mp, path, false);
+}
+
+static int remove_cgroup(struct cgroup_mount_point *mp,
+			 const char *path, bool recurse)
+{
+	return create_or_remove_cgroup(true, mp, path, recurse);
+}
+
+static char *cgroup_to_absolute_path(struct cgroup_mount_point *mp,
+				     const char *path, const char *suffix)
+{
+	/* first we have to make sure we subtract the mount point's prefix */
+	char *prefix = mp->mount_prefix;
+	char *buf;
+	ssize_t len, rv;
+
+	/* we want to make sure only absolute paths to cgroups are passed to us */
+	if (path[0] != '/') {
+		errno = EINVAL;
+		return NULL;
+	}
+
+	if (prefix && !strcmp(prefix, "/"))
+		prefix = NULL;
+
+	/* prefix doesn't match */
+	if (prefix && strncmp(prefix, path, strlen(prefix)) != 0) {
+		errno = EINVAL;
+		return NULL;
+	}
+	/* if prefix is /foo and path is /foobar */
+	if (prefix && path[strlen(prefix)] != '/' && path[strlen(prefix)] != '\0') {
+		errno = EINVAL;
+		return NULL;
+	}
+
+	/* remove prefix from path */
+	path += prefix ? strlen(prefix) : 0;
+
+	len = strlen(mp->mount_point) + strlen(path) + (suffix ? strlen(suffix) : 0);
+	buf = calloc(len + 1, 1);
+	if (!buf)
+		return NULL;
+	rv = snprintf(buf, len + 1, "%s%s%s", mp->mount_point, path, suffix ? suffix : "");
+	if (rv > len) {
+		free(buf);
+		errno = ENOMEM;
+		return NULL;
+	}
+
+	return buf;
+}
+
+static struct cgroup_process_info *
+find_info_for_subsystem(struct cgroup_process_info *info, const char *subsystem)
+{
+	struct cgroup_process_info *info_ptr;
+	for (info_ptr = info; info_ptr; info_ptr = info_ptr->next) {
+		struct cgroup_hierarchy *h = info_ptr->hierarchy;
+		if (lxc_string_in_array(subsystem, (const char **)h->subsystems))
+			return info_ptr;
+	}
+	errno = ENOENT;
+	return NULL;
+}
+
+static int do_cgroup_get(const char *cgroup_path, const char *sub_filename,
+			 char *value, size_t len)
+{
+	const char *parts[3] = {
+		cgroup_path,
+		sub_filename,
+		NULL
+	};
+	char *filename;
+	int ret, saved_errno;
+
+	filename = lxc_string_join("/", parts, false);
+	if (!filename)
+		return -1;
+
+	ret = lxc_read_from_file(filename, value, len);
+	saved_errno = errno;
+	free(filename);
+	errno = saved_errno;
+	return ret;
+}
+
+static int do_cgroup_set(const char *cgroup_path, const char *sub_filename,
+			 const char *value)
+{
+	const char *parts[3] = {
+		cgroup_path,
+		sub_filename,
+		NULL
+	};
+	char *filename;
+	int ret, saved_errno;
+
+	filename = lxc_string_join("/", parts, false);
+	if (!filename)
+		return -1;
+
+	ret = lxc_write_to_file(filename, value, strlen(value), false);
+	saved_errno = errno;
+	free(filename);
+	errno = saved_errno;
+	return ret;
+}
+
+static int do_setup_cgroup_limits(struct lxc_handler *h,
+			   struct lxc_list *cgroup_settings, bool do_devices)
+{
+	struct lxc_list *iterator;
+	struct lxc_cgroup *cg;
+	int ret = -1;
+
+	if (lxc_list_empty(cgroup_settings))
+		return 0;
+
+	lxc_list_for_each(iterator, cgroup_settings) {
+		cg = iterator->elem;
+
+		if (do_devices == !strncmp("devices", cg->subsystem, 7)) {
+			if (strcmp(cg->subsystem, "devices.deny") == 0 &&
+					cgroup_devices_has_allow_or_deny(h, cg->value, false))
+				continue;
+			if (strcmp(cg->subsystem, "devices.allow") == 0 &&
+					cgroup_devices_has_allow_or_deny(h, cg->value, true))
+				continue;
+			if (lxc_cgroup_set_handler(cg->subsystem, cg->value, h)) {
+				ERROR("Error setting %s to %s for %s\n",
+				      cg->subsystem, cg->value, h->name);
+				goto out;
+			}
+		}
+
+		DEBUG("cgroup '%s' set to '%s'", cg->subsystem, cg->value);
+	}
+
+	ret = 0;
+	INFO("cgroup has been setup");
+out:
+	return ret;
+}
+
+static bool cgroup_devices_has_allow_or_deny(struct lxc_handler *h,
+					     char *v, bool for_allow)
+{
+	char *path;
+	FILE *devices_list;
+	char *line = NULL;
+	size_t sz = 0;
+	bool ret = !for_allow;
+	const char *parts[3] = {
+		NULL,
+		"devices.list",
+		NULL
+	};
+
+	// XXX FIXME if users could use something other than 'lxc.devices.deny = a'.
+	// not sure they ever do, but they *could*
+	// right now, I'm assuming they do NOT
+	if (!for_allow && strcmp(v, "a") != 0 && strcmp(v, "a *:* rwm") != 0)
+		return false;
+
+	parts[0] = (const char *)lxc_cgroup_get_hierarchy_abs_path_handler("devices", h);
+	if (!parts[0])
+		return false;
+	path = lxc_string_join("/", parts, false);
+	if (!path) {
+		free((void *)parts[0]);
+		return false;
+	}
+
+	devices_list = fopen_cloexec(path, "r");
+	if (!devices_list) {
+		free(path);
+		return false;
+	}
+
+	while (getline(&line, &sz, devices_list) != -1) {
+		size_t len = strlen(line);
+		if (len > 0 && line[len-1] == '\n')
+			line[len-1] = '\0';
+		if (strcmp(line, "a *:* rwm") == 0) {
+			ret = for_allow;
+			goto out;
+		} else if (for_allow && strcmp(line, v) == 0) {
+			ret = true;
+			goto out;
+		}
+	}
+
+out:
+	fclose(devices_list);
+	free(line);
+	free(path);
+	return ret;
+}
+
+static int cgroup_recursive_task_count(const char *cgroup_path)
+{
+	DIR *d;
+	struct dirent *dent_buf;
+	struct dirent *dent;
+	ssize_t name_max;
+	int n = 0, r;
+
+	/* see man readdir_r(3) */
+	name_max = pathconf(cgroup_path, _PC_NAME_MAX);
+	if (name_max <= 0)
+		name_max = 255;
+	dent_buf = malloc(offsetof(struct dirent, d_name) + name_max + 1);
+	if (!dent_buf)
+		return -1;
+
+	d = opendir(cgroup_path);
+	if (!d) {
+		free(dent_buf);
+		return 0;
+	}
+
+	while (readdir_r(d, dent_buf, &dent) == 0 && dent) {
+		const char *parts[3] = {
+			cgroup_path,
+			dent->d_name,
+			NULL
+		};
+		char *sub_path;
+		struct stat st;
+
+		if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, ".."))
+			continue;
+		sub_path = lxc_string_join("/", parts, false);
+		if (!sub_path) {
+			closedir(d);
+			free(dent_buf);
+			return -1;
+		}
+		r = stat(sub_path, &st);
+		if (r < 0) {
+			closedir(d);
+			free(dent_buf);
+			free(sub_path);
+			return -1;
+		}
+		if (S_ISDIR(st.st_mode)) {
+			r = cgroup_recursive_task_count(sub_path);
+			if (r >= 0)
+				n += r;
+		} else if (!strcmp(dent->d_name, "tasks")) {
+			r = count_lines(sub_path);
+			if (r >= 0)
+				n += r;
+		}
+		free(sub_path);
+	}
+	closedir(d);
+	free(dent_buf);
+
+	return n;
+}
+
+static int count_lines(const char *fn)
+{
+	FILE *f;
+	char *line = NULL;
+	size_t sz = 0;
+	int n = 0;
+
+	f = fopen_cloexec(fn, "r");
+	if (!f)
+		return -1;
+
+	while (getline(&line, &sz, f) != -1) {
+		n++;
+	}
+	free(line);
+	fclose(f);
+	return n;
+}
+
+static int handle_cgroup_settings(struct cgroup_mount_point *mp,
+				  char *cgroup_path)
+{
+	int r, saved_errno = 0;
+	char buf[2];
+
+	mp->need_cpuset_init = false;
+
+	/* If this is the memory cgroup, we want to enforce hierarchy.
+	 * But don't fail if for some reason we can't.
+	 */
+	if (lxc_string_in_array("memory", (const char **)mp->hierarchy->subsystems)) {
+		char *cc_path = cgroup_to_absolute_path(mp, cgroup_path, "/memory.use_hierarchy");
+		if (cc_path) {
+			r = lxc_read_from_file(cc_path, buf, 1);
+			if (r < 1 || buf[0] != '1') {
+				r = lxc_write_to_file(cc_path, "1", 1, false);
+				if (r < 0)
+					SYSERROR("failed to set memory.use_hiararchy to 1; continuing");
+			}
+			free(cc_path);
+		}
+	}
+
+	/* if this is a cpuset hierarchy, we have to set cgroup.clone_children in
+	 * the base cgroup, otherwise containers will start with an empty cpuset.mems
+	 * and cpuset.cpus and then
+	 */
+	if (lxc_string_in_array("cpuset", (const char **)mp->hierarchy->subsystems)) {
+		char *cc_path = cgroup_to_absolute_path(mp, cgroup_path, "/cgroup.clone_children");
+		struct stat sb;
+
+		if (!cc_path)
+			return -1;
+		/* cgroup.clone_children is not available when running under
+		 * older kernel versions; in this case, we'll initialize
+		 * cpuset.cpus and cpuset.mems later, after the new cgroup
+		 * was created
+		 */
+		if (stat(cc_path, &sb) != 0 && errno == ENOENT) {
+			mp->need_cpuset_init = true;
+			free(cc_path);
+			return 0;
+		}
+		r = lxc_read_from_file(cc_path, buf, 1);
+		if (r == 1 && buf[0] == '1') {
+			free(cc_path);
+			return 0;
+		}
+		r = lxc_write_to_file(cc_path, "1", 1, false);
+		saved_errno = errno;
+		free(cc_path);
+		errno = saved_errno;
+		return r < 0 ? -1 : 0;
+	}
+	return 0;
+}
+
+static int cgroup_read_from_file(const char *fn, char buf[], size_t bufsize)
+{
+	int ret = lxc_read_from_file(fn, buf, bufsize);
+	if (ret < 0) {
+		SYSERROR("failed to read %s", fn);
+		return ret;
+	}
+	if (ret == bufsize) {
+		if (bufsize > 0) {
+			/* obviously this wasn't empty */
+			buf[bufsize-1] = '\0';
+			return ret;
+		}
+		/* Callers don't do this, but regression/sanity check */
+		ERROR("%s: was not expecting 0 bufsize", __func__);
+		return -1;
+	}
+	buf[ret] = '\0';
+	return ret;
+}
+
+static bool do_init_cpuset_file(struct cgroup_mount_point *mp,
+				const char *path, const char *name)
+{
+	char value[1024];
+	char *childfile, *parentfile = NULL, *tmp;
+	int ret;
+	bool ok = false;
+
+	childfile = cgroup_to_absolute_path(mp, path, name);
+	if (!childfile)
+		return false;
+
+	/* don't overwrite a non-empty value in the file */
+	ret = cgroup_read_from_file(childfile, value, sizeof(value));
+	if (ret < 0)
+		goto out;
+	if (value[0] != '\0' && value[0] != '\n') {
+		ok = true;
+		goto out;
+	}
+
+	/* path to the same name in the parent cgroup */
+	parentfile = strdup(path);
+	if (!parentfile)
+		goto out;
+
+	tmp = strrchr(parentfile, '/');
+	if (!tmp)
+		goto out;
+	if (tmp == parentfile)
+		tmp++; /* keep the '/' at the start */
+	*tmp = '\0';
+	tmp = parentfile;
+	parentfile = cgroup_to_absolute_path(mp, tmp, name);
+	free(tmp);
+	if (!parentfile)
+		goto out;
+
+	/* copy from parent to child cgroup */
+	ret = cgroup_read_from_file(parentfile, value, sizeof(value));
+	if (ret < 0)
+		goto out;
+	if (ret == sizeof(value)) {
+		/* If anyone actually sees this error, we can address it */
+		ERROR("parent cpuset value too long");
+		goto out;
+	}
+	ok = (lxc_write_to_file(childfile, value, strlen(value), false) >= 0);
+	if (!ok)
+		SYSERROR("failed writing %s", childfile);
+
+out:
+	if (parentfile)
+		free(parentfile);
+	free(childfile);
+	return ok;
+}
+
+static bool init_cpuset_if_needed(struct cgroup_mount_point *mp,
+				  const char *path)
+{
+	/* the files we have to handle here are only in cpuset hierarchies */
+	if (!lxc_string_in_array("cpuset",
+				 (const char **)mp->hierarchy->subsystems))
+		return true;
+
+	if (!mp->need_cpuset_init)
+		return true;
+
+	return (do_init_cpuset_file(mp, path, "/cpuset.cpus") &&
+		do_init_cpuset_file(mp, path, "/cpuset.mems") );
+}
+
+extern void lxc_monitor_send_state(const char *name, lxc_state_t state,
+			    const char *lxcpath);
+int do_unfreeze(int freeze, const char *name, const char *lxcpath)
+{
+	char v[100];
+	const char *state = freeze ? "FROZEN" : "THAWED";
+
+	if (lxc_cgroup_set("freezer.state", state, name, lxcpath) < 0) {
+		ERROR("Failed to freeze %s:%s", lxcpath, name);
+		return -1;
+	}
+	while (1) {
+		if (lxc_cgroup_get("freezer.state", v, 100, name, lxcpath) < 0) {
+			ERROR("Failed to get new freezer state for %s:%s", lxcpath, name);
+			return -1;
+		}
+		if (v[strlen(v)-1] == '\n')
+			v[strlen(v)-1] = '\0';
+		if (strncmp(v, state, strlen(state)) == 0) {
+			if (name)
+				lxc_monitor_send_state(name, freeze ? FROZEN : THAWED, lxcpath);
+			return 0;
+		}
+		sleep(1);
+	}
+}
+
+int freeze_unfreeze(const char *name, int freeze, const char *lxcpath)
+{
+	return do_unfreeze(freeze, name, lxcpath);
+}
+
+lxc_state_t freezer_state(const char *name, const char *lxcpath)
+{
+	char v[100];
+	if (lxc_cgroup_get("freezer.state", v, 100, name, lxcpath) < 0)
+		return -1;
+
+	if (v[strlen(v)-1] == '\n')
+		v[strlen(v)-1] = '\0';
+	return lxc_str2state(v);
+}
+
+static void cgfs_destroy(struct lxc_handler *handler)
+{
+	struct cgfs_data *d = handler->cgroup_info->data;
+	if (!d)
+		return;
+	if (d->info)
+		lxc_cgroup_process_info_free_and_remove(d->info);
+	if (d->meta)
+		lxc_cgroup_put_meta(d->meta);
+	free(d);
+	handler->cgroup_info->data = NULL;
+}
+
+static inline bool cgfs_init(struct lxc_handler *handler)
+{
+	struct cgfs_data *d = malloc(sizeof(*d));
+	if (!d)
+		return false;
+	d->info = NULL;
+	d->meta = lxc_cgroup_load_meta();
+
+	if (!d->meta) {
+		ERROR("cgroupfs failed to detect cgroup metadata");
+		free(d);
+		return false;
+	}
+	handler->cgroup_info->data = d;
+	return true;
+}
+
+static inline bool cgfs_create(struct lxc_handler *handler)
+{
+	struct cgfs_data *d = handler->cgroup_info->data;
+	struct cgroup_process_info *i;
+	struct cgroup_meta_data *md = d->meta;
+	i = lxc_cgroupfs_create(handler->name, handler->cgroup_info->cgroup_pattern, md, NULL);
+	if (!i)
+		return false;
+	d->info = i;
+	return true;
+}
+
+static inline bool cgfs_enter(struct lxc_handler *handler)
+{
+	struct cgfs_data *d = handler->cgroup_info->data;
+	struct cgroup_process_info *i = d->info;
+	int ret;
+	
+	ret = lxc_cgroupfs_enter(i, handler->pid, false);
+
+	return ret == 0;
+}
+
+static inline bool cgfs_create_legacy(struct lxc_handler *handler)
+{
+	struct cgfs_data *d = handler->cgroup_info->data;
+	struct cgroup_process_info *i = d->info;
+	if (lxc_cgroup_create_legacy(i, handler->name, handler->pid) < 0) {
+		ERROR("failed to create legacy ns cgroups for '%s'", handler->name);
+		return false;
+	}
+	return true;
+}
+
+static char *cgfs_get_cgroup(struct lxc_handler *handler, const char *subsystem)
+{
+	return lxc_cgroup_get_hierarchy_path_handler(subsystem, handler);
+}
+
+static bool cgfs_unfreeze_fromhandler(struct lxc_handler *handler)
+{
+	char *cgabspath, *cgrelpath;
+	int ret;
+
+	cgrelpath = lxc_cgroup_get_hierarchy_path_handler("freezer", handler);
+	cgabspath = lxc_cgroup_find_abs_path("freezer", cgrelpath, true, NULL);
+	if (!cgabspath)
+		return false;
+
+	ret = do_cgroup_set(cgabspath, "freezer.state", "THAWED");
+	free(cgabspath);
+	return ret == 0;
+}
+
+bool cgroupfs_setup_limits(struct lxc_handler *h, bool with_devices)
+{
+	return do_setup_cgroup_limits(h, &h->conf->cgroup, with_devices) == 0;
+}
+
+bool lxc_cgroupfs_attach(const char *name, const char *lxcpath, pid_t pid)
+{
+	struct cgroup_meta_data *meta_data;
+	struct cgroup_process_info *container_info;
+	int ret;
+
+	meta_data = lxc_cgroup_load_meta();
+	if (!meta_data) {
+		ERROR("could not move attached process %d to cgroup of container", pid);
+		return false;
+	}
+
+	container_info = lxc_cgroup_get_container_info(name, lxcpath, meta_data);
+	lxc_cgroup_put_meta(meta_data);
+	if (!container_info) {
+		ERROR("could not move attached process %d to cgroup of container", pid);
+		return false;
+	}
+
+	ret = lxc_cgroupfs_enter(container_info, pid, false);
+	lxc_cgroup_process_info_free(container_info);
+	if (ret < 0) {
+		ERROR("could not move attached process %d to cgroup of container", pid);
+		return false;
+	}
+	return true;
+}
+
+static struct cgroup_ops cgfs_ops = {
+	.destroy = cgfs_destroy,
+	.init = cgfs_init,
+	.create = cgfs_create,
+	.enter = cgfs_enter,
+	.create_legacy = cgfs_create_legacy,
+	.get_cgroup = cgfs_get_cgroup,
+	.get = lxc_cgroupfs_get,
+	.set = lxc_cgroupfs_set,
+	.unfreeze_fromhandler = cgfs_unfreeze_fromhandler,
+	.setup_limits = cgroupfs_setup_limits,
+	.name = "cgroupfs",
+	.attach = lxc_cgroupfs_attach,
+	.chown = NULL,
+	.mount_cgroup = cgroupfs_mount_cgroup,
+};
+static void init_cg_ops(void)
+{
+	if (!use_cgmanager)
+		return;
+	if (cgmanager_initialized)
+		return;
+	if (!lxc_init_cgmanager()) {
+		ERROR("Could not contact cgroup manager, falling back to cgroupfs");
+		active_cg_ops = &cgfs_ops;
+	}
+}
+
+/*
+ * These are the backend-independent cgroup handlers for container
+ * start and stop
+ */
+
+/* Free all cgroup info held by the handler */
+void cgroup_destroy(struct lxc_handler *handler)
+{
+	if (!handler->cgroup_info)
+		return;
+	if (active_cg_ops)
+		active_cg_ops->destroy(handler);
+}
+
+/*
+ * Allocate a lxc_cgroup_info for the active cgroup
+ * backend, and assign it to the handler
+ */
+bool cgroup_init(struct lxc_handler *handler)
+{
+	init_cg_ops();
+	handler->cgroup_info = malloc(sizeof(struct lxc_cgroup_info));
+	if (!handler->cgroup_info)
+		return false;
+	memset(handler->cgroup_info, 0, sizeof(struct lxc_cgroup_info));
+	/* if we are running as root, use system cgroup pattern, otherwise
+	 * just create a cgroup under the current one. But also fall back to
+	 * that if for some reason reading the configuration fails and no
+	 * default value is available
+	 */
+	if (geteuid() == 0)
+		handler->cgroup_info->cgroup_pattern = lxc_global_config_value("lxc.cgroup.pattern");
+	if (!handler->cgroup_info->cgroup_pattern)
+		handler->cgroup_info->cgroup_pattern = "%n";
+
+	return active_cg_ops->init(handler);
+}
+
+/* Create the container cgroups for all requested controllers */
+bool cgroup_create(struct lxc_handler *handler)
+{
+	return active_cg_ops->create(handler);
+}
+
+/*
+ * Enter the container init into its new cgroups for all
+ * requested controllers */
+bool cgroup_enter(struct lxc_handler *handler)
+{
+	return active_cg_ops->enter(handler);
+}
+
+bool cgroup_create_legacy(struct lxc_handler *handler)
+{
+	if (active_cg_ops->create_legacy)
+		return active_cg_ops->create_legacy(handler);
+	return true;
+}
+
+char *cgroup_get_cgroup(struct lxc_handler *handler, const char *subsystem)
+{
+	return active_cg_ops->get_cgroup(handler, subsystem);
+}
+
+int lxc_cgroup_set(const char *filename, const char *value, const char *name, const char *lxcpath)
+{
+	init_cg_ops();
+	return active_cg_ops->set(filename, value, name, lxcpath);
+}
+
+int lxc_cgroup_get(const char *filename, char *value, size_t len, const char *name, const char *lxcpath)
+{
+	init_cg_ops();
+	return active_cg_ops->get(filename, value, len, name, lxcpath);
+}
+
+bool lxc_unfreeze_fromhandler(struct lxc_handler *handler)
+{
+	return active_cg_ops->unfreeze_fromhandler(handler);
+}
+
+bool cgroup_setup_limits(struct lxc_handler *handler, bool with_devices)
+{
+	return active_cg_ops->setup_limits(handler, with_devices);
+}
+
+bool cgroup_chown(struct lxc_handler *handler)
+{
+	if (active_cg_ops->chown)
+		return active_cg_ops->chown(handler);
+	return true;
+}
+
+bool lxc_cgroup_attach(const char *name, const char *lxcpath, pid_t pid)
+{
+	init_cg_ops();
+	return active_cg_ops->attach(name, lxcpath, pid);
+}
+
+bool lxc_setup_mount_cgroup(const char *root,
+		struct lxc_cgroup_info *cgroup_info, int type)
+{
+	return active_cg_ops->mount_cgroup(root, cgroup_info, type);
+}
diff --git a/src/lxc/cgroup.c b/src/lxc/cgroup.c
deleted file mode 100644
index c23b784..0000000
--- a/src/lxc/cgroup.c
+++ /dev/null
@@ -1,2461 +0,0 @@
-/*
- * lxc: linux Container library
- *
- * (C) Copyright IBM Corp. 2007, 2008
- *
- * Authors:
- * Daniel Lezcano <daniel.lezcano at free.fr>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include "config.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <errno.h>
-#include <unistd.h>
-#include <string.h>
-#include <dirent.h>
-#include <fcntl.h>
-#include <ctype.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/param.h>
-#include <sys/inotify.h>
-#include <sys/mount.h>
-#include <netinet/in.h>
-#include <net/if.h>
-
-#include "error.h"
-#include "commands.h"
-#include "list.h"
-#include "conf.h"
-#include "utils.h"
-#include "bdev.h"
-#include "log.h"
-#include "cgroup.h"
-#include "start.h"
-#include "state.h"
-
-#if IS_BIONIC
-#include <../include/lxcmntent.h>
-#else
-#include <mntent.h>
-#endif
-
-lxc_log_define(lxc_cgroup, lxc);
-
-static struct cgroup_process_info *lxc_cgroup_process_info_getx(const char *proc_pid_cgroup_str, struct cgroup_meta_data *meta);
-static char **subsystems_from_mount_options(const char *mount_options, char **kernel_list);
-static void lxc_cgroup_mount_point_free(struct cgroup_mount_point *mp);
-static void lxc_cgroup_hierarchy_free(struct cgroup_hierarchy *h);
-static bool is_valid_cgroup(const char *name);
-static int create_cgroup(struct cgroup_mount_point *mp, const char *path);
-static int remove_cgroup(struct cgroup_mount_point *mp, const char *path, bool recurse);
-static char *cgroup_to_absolute_path(struct cgroup_mount_point *mp, const char *path, const char *suffix);
-static struct cgroup_process_info *find_info_for_subsystem(struct cgroup_process_info *info, const char *subsystem);
-static int do_cgroup_get(const char *cgroup_path, const char *sub_filename, char *value, size_t len);
-static int do_cgroup_set(const char *cgroup_path, const char *sub_filename, const char *value);
-static bool cgroup_devices_has_allow_or_deny(struct lxc_handler *h, char *v, bool for_allow);
-static int do_setup_cgroup_limits(struct lxc_handler *h, struct lxc_list *cgroup_settings, bool do_devices);
-static int cgroup_recursive_task_count(const char *cgroup_path);
-static int count_lines(const char *fn);
-static int handle_cgroup_settings(struct cgroup_mount_point *mp, char *cgroup_path);
-static bool init_cpuset_if_needed(struct cgroup_mount_point *mp, const char *path);
-
-static struct cgroup_ops cgfs_ops;
-struct cgroup_ops *active_cg_ops = &cgfs_ops;
-static void init_cg_ops(void);
-
-#ifdef HAVE_CGMANAGER
-/* this needs to be mutexed for api use */
-extern bool cgmanager_initialized;
-extern bool use_cgmanager;
-extern bool lxc_init_cgmanager(void);
-#else
-static bool cgmanager_initialized = false;
-static bool use_cgmanager = false;
-static bool lxc_init_cgmanager(void) { return false; }
-#endif
-
-static int cgroup_rmdir(char *dirname)
-{
-	struct dirent dirent, *direntp;
-	int saved_errno = 0;
-	DIR *dir;
-	int ret, failed=0;
-	char pathname[MAXPATHLEN];
-
-	dir = opendir(dirname);
-	if (!dir) {
-		ERROR("%s: failed to open %s", __func__, dirname);
-		return -1;
-	}
-
-	while (!readdir_r(dir, &dirent, &direntp)) {
-		struct stat mystat;
-		int rc;
-
-		if (!direntp)
-			break;
-
-		if (!strcmp(direntp->d_name, ".") ||
-		    !strcmp(direntp->d_name, ".."))
-			continue;
-
-		rc = snprintf(pathname, MAXPATHLEN, "%s/%s", dirname, direntp->d_name);
-		if (rc < 0 || rc >= MAXPATHLEN) {
-			ERROR("pathname too long");
-			failed=1;
-			if (!saved_errno)
-				saved_errno = -ENOMEM;
-			continue;
-		}
-		ret = lstat(pathname, &mystat);
-		if (ret) {
-			SYSERROR("%s: failed to stat %s", __func__, pathname);
-			failed=1;
-			if (!saved_errno)
-				saved_errno = errno;
-			continue;
-		}
-		if (S_ISDIR(mystat.st_mode)) {
-			if (cgroup_rmdir(pathname) < 0) {
-				if (!saved_errno)
-					saved_errno = errno;
-				failed=1;
-			}
-		}
-	}
-
-	if (rmdir(dirname) < 0) {
-		SYSERROR("%s: failed to delete %s", __func__, dirname);
-		if (!saved_errno)
-			saved_errno = errno;
-		failed=1;
-	}
-
-	ret = closedir(dir);
-	if (ret) {
-		SYSERROR("%s: failed to close directory %s", __func__, dirname);
-		if (!saved_errno)
-			saved_errno = errno;
-		failed=1;
-	}
-
-	errno = saved_errno;
-	return failed ? -1 : 0;
-}
-
-struct cgroup_meta_data *lxc_cgroup_load_meta()
-{
-	const char *cgroup_use = NULL;
-	char **cgroup_use_list = NULL;
-	struct cgroup_meta_data *md = NULL;
-	int saved_errno;
-
-	errno = 0;
-	cgroup_use = lxc_global_config_value("lxc.cgroup.use");
-	if (!cgroup_use && errno != 0)
-		return NULL;
-	if (cgroup_use) {
-		cgroup_use_list = lxc_string_split_and_trim(cgroup_use, ',');
-		if (!cgroup_use_list)
-			return NULL;
-	}
-
-	md = lxc_cgroup_load_meta2((const char **)cgroup_use_list);
-	saved_errno = errno;
-	lxc_free_array((void **)cgroup_use_list, free);
-	errno = saved_errno;
-	return md;
-}
-
-/* Step 1: determine all kernel subsystems */
-bool find_cgroup_subsystems(char ***kernel_subsystems)
-{
-	FILE *proc_cgroups;
-	bool bret = false;
-	char *line = NULL;
-	size_t sz = 0;
-	size_t kernel_subsystems_count = 0;
-	size_t kernel_subsystems_capacity = 0;
-	int r;
-
-	proc_cgroups = fopen_cloexec("/proc/cgroups", "r");
-	if (!proc_cgroups)
-		return false;
-
-	while (getline(&line, &sz, proc_cgroups) != -1) {
-		char *tab1;
-		char *tab2;
-		int hierarchy_number;
-
-		if (line[0] == '#')
-			continue;
-		if (!line[0])
-			continue;
-
-		tab1 = strchr(line, '\t');
-		if (!tab1)
-			continue;
-		*tab1++ = '\0';
-		tab2 = strchr(tab1, '\t');
-		if (!tab2)
-			continue;
-		*tab2 = '\0';
-
-		tab2 = NULL;
-		hierarchy_number = strtoul(tab1, &tab2, 10);
-		if (!tab2 || *tab2)
-			continue;
-		(void)hierarchy_number;
-
-		r = lxc_grow_array((void ***)kernel_subsystems, &kernel_subsystems_capacity, kernel_subsystems_count + 1, 12);
-		if (r < 0)
-			goto out;
-		(*kernel_subsystems)[kernel_subsystems_count] = strdup(line);
-		if (!(*kernel_subsystems)[kernel_subsystems_count])
-			goto out;
-		kernel_subsystems_count++;
-	}
-	bret = true;
-
-out:
-	fclose(proc_cgroups);
-	free(line);
-	return bret;
-}
-
-/* Step 2: determine all hierarchies (by reading /proc/self/cgroup),
- *         since mount points don't specify hierarchy number and
- *         /proc/cgroups does not contain named hierarchies
- */
-static bool find_cgroup_hierarchies(struct cgroup_meta_data *meta_data,
-	bool all_kernel_subsystems, bool all_named_subsystems,
-	const char **subsystem_whitelist)
-{
-	FILE *proc_self_cgroup;
-	char *line = NULL;
-	size_t sz = 0;
-	int r;
-	bool bret = false;
-	size_t hierarchy_capacity = 0;
-
-	proc_self_cgroup = fopen_cloexec("/proc/self/cgroup", "r");
-	/* if for some reason (because of setns() and pid namespace for example),
-	 * /proc/self is not valid, we try /proc/1/cgroup... */
-	if (!proc_self_cgroup)
-		proc_self_cgroup = fopen_cloexec("/proc/1/cgroup", "r");
-	if (!proc_self_cgroup)
-		return false;
-
-	while (getline(&line, &sz, proc_self_cgroup) != -1) {
-		/* file format: hierarchy:subsystems:group,
-		 * we only extract hierarchy and subsystems
-		 * here */
-		char *colon1;
-		char *colon2;
-		int hierarchy_number;
-		struct cgroup_hierarchy *h = NULL;
-		char **p;
-
-		if (!line[0])
-			continue;
-
-		colon1 = strchr(line, ':');
-		if (!colon1)
-			continue;
-		*colon1++ = '\0';
-		colon2 = strchr(colon1, ':');
-		if (!colon2)
-			continue;
-		*colon2 = '\0';
-
-		colon2 = NULL;
-		hierarchy_number = strtoul(line, &colon2, 10);
-		if (!colon2 || *colon2)
-			continue;
-
-		if (hierarchy_number > meta_data->maximum_hierarchy) {
-			/* lxc_grow_array will never shrink, so even if we find a lower
-			* hierarchy number here, the array will never be smaller
-			*/
-			r = lxc_grow_array((void ***)&meta_data->hierarchies, &hierarchy_capacity, hierarchy_number + 1, 12);
-			if (r < 0)
-				goto out;
-
-			meta_data->maximum_hierarchy = hierarchy_number;
-		}
-
-		/* this shouldn't happen, we had this already */
-		if (meta_data->hierarchies[hierarchy_number])
-			goto out;
-
-		h = calloc(1, sizeof(struct cgroup_hierarchy));
-		if (!h)
-			goto out;
-
-		meta_data->hierarchies[hierarchy_number] = h;
-
-		h->index = hierarchy_number;
-		h->subsystems = lxc_string_split_and_trim(colon1, ',');
-		if (!h->subsystems)
-			goto out;
-		/* see if this hierarchy should be considered */
-		if (!all_kernel_subsystems || !all_named_subsystems) {
-			for (p = h->subsystems; *p; p++) {
-				if (!strncmp(*p, "name=", 5)) {
-					if (all_named_subsystems || (subsystem_whitelist && lxc_string_in_array(*p, subsystem_whitelist))) {
-						h->used = true;
-						break;
-					}
-				} else {
-					if (all_kernel_subsystems || (subsystem_whitelist && lxc_string_in_array(*p, subsystem_whitelist))) {
-						h->used = true;
-						break;
-					}
-				}
-			}
-		} else {
-			/* we want all hierarchy anyway */
-			h->used = true;
-		}
-	}
-	bret = true;
-
-out:
-	fclose(proc_self_cgroup);
-	free(line);
-	return bret;
-}
-
-/* Step 3: determine all mount points of each hierarchy */
-static bool find_hierarchy_mountpts( struct cgroup_meta_data *meta_data, char **kernel_subsystems)
-{
-	bool bret = false;
-	FILE *proc_self_mountinfo;
-	char *line = NULL;
-	size_t sz = 0;
-	char **tokens = NULL;
-	size_t mount_point_count = 0;
-	size_t mount_point_capacity = 0;
-	size_t token_capacity = 0;
-	int r;
-
-	proc_self_mountinfo = fopen_cloexec("/proc/self/mountinfo", "r");
-	/* if for some reason (because of setns() and pid namespace for example),
-	 * /proc/self is not valid, we try /proc/1/cgroup... */
-	if (!proc_self_mountinfo)
-		proc_self_mountinfo = fopen_cloexec("/proc/1/mountinfo", "r");
-	if (!proc_self_mountinfo)
-		return false;
-
-	while (getline(&line, &sz, proc_self_mountinfo) != -1) {
-		char *token, *line_tok, *saveptr = NULL;
-		size_t i, j, k;
-		struct cgroup_mount_point *mount_point;
-		struct cgroup_hierarchy *h;
-		char **subsystems;
-
-		if (line[0] && line[strlen(line) - 1] == '\n')
-			line[strlen(line) - 1] = '\0';
-
-		for (i = 0, line_tok = line; (token = strtok_r(line_tok, " ", &saveptr)); line_tok = NULL) {
-			r = lxc_grow_array((void ***)&tokens, &token_capacity, i + 1, 64);
-			if (r < 0)
-				goto out;
-			tokens[i++] = token;
-		}
-
-		/* layout of /proc/self/mountinfo:
-		 *      0: id
-		 *      1: parent id
-		 *      2: device major:minor
-		 *      3: mount prefix
-		 *      4: mount point
-		 *      5: per-mount options
-		 *    [optional X]: additional data
-		 *    X+7: "-"
-		 *    X+8: type
-		 *    X+9: source
-		 *    X+10: per-superblock options
-		 */
-		for (j = 6; j < i && tokens[j]; j++)
-			if (!strcmp(tokens[j], "-"))
-				break;
-
-		/* could not find separator */
-		if (j >= i || !tokens[j])
-			continue;
-		/* there should be exactly three fields after
-		 * the separator
-		 */
-		if (i != j + 4)
-			continue;
-
-		/* not a cgroup filesystem */
-		if (strcmp(tokens[j + 1], "cgroup") != 0)
-			continue;
-
-		subsystems = subsystems_from_mount_options(tokens[j + 3], kernel_subsystems);
-		if (!subsystems)
-			goto out;
-
-		h = NULL;
-		for (k = 1; k <= meta_data->maximum_hierarchy; k++) {
-			if (meta_data->hierarchies[k] &&
-			    meta_data->hierarchies[k]->subsystems[0] &&
-			    lxc_string_in_array(meta_data->hierarchies[k]->subsystems[0], (const char **)subsystems)) {
-				/* TODO: we could also check if the lists really match completely,
-				 *       just to have an additional sanity check */
-				h = meta_data->hierarchies[k];
-				break;
-			}
-		}
-		lxc_free_array((void **)subsystems, free);
-
-		r = lxc_grow_array((void ***)&meta_data->mount_points, &mount_point_capacity, mount_point_count + 1, 12);
-		if (r < 0)
-			goto out;
-
-		/* create mount point object */
-		mount_point = calloc(1, sizeof(*mount_point));
-		if (!mount_point)
-			goto out;
-
-		meta_data->mount_points[mount_point_count++] = mount_point;
-
-		mount_point->hierarchy = h;
-		mount_point->mount_point = strdup(tokens[4]);
-		mount_point->mount_prefix = strdup(tokens[3]);
-		if (!mount_point->mount_point || !mount_point->mount_prefix)
-			goto out;
-		mount_point->read_only = !lxc_string_in_list("rw", tokens[5], ',');
-
-		if (!strcmp(mount_point->mount_prefix, "/")) {
-			if (mount_point->read_only) {
-				if (!h->ro_absolute_mount_point)
-					h->ro_absolute_mount_point = mount_point;
-			} else {
-				if (!h->rw_absolute_mount_point)
-					h->rw_absolute_mount_point = mount_point;
-			}
-		}
-
-		k = lxc_array_len((void **)h->all_mount_points);
-		r = lxc_grow_array((void ***)&h->all_mount_points, &h->all_mount_point_capacity, k + 1, 4);
-		if (r < 0)
-			goto out;
-		h->all_mount_points[k] = mount_point;
-	}
-	bret = true;
-
-out:
-	fclose(proc_self_mountinfo);
-	free(tokens);
-	free(line);
-	return bret;
-}
-
-struct cgroup_meta_data *lxc_cgroup_load_meta2(const char **subsystem_whitelist)
-{
-	bool all_kernel_subsystems = true;
-	bool all_named_subsystems = false;
-	struct cgroup_meta_data *meta_data = NULL;
-	char **kernel_subsystems = NULL;
-	int saved_errno = 0;
-
-	/* if the subsystem whitelist is not specified, include all
-	 * hierarchies that contain kernel subsystems by default but
-	 * no hierarchies that only contain named subsystems
-	 *
-	 * if it is specified, the specifier @all will select all
-	 * hierarchies, @kernel will select all hierarchies with
-	 * kernel subsystems and @named will select all named
-	 * hierarchies
-	 */
-	all_kernel_subsystems = subsystem_whitelist ?
-		(lxc_string_in_array("@kernel", subsystem_whitelist) || lxc_string_in_array("@all", subsystem_whitelist)) :
-		true;
-	all_named_subsystems = subsystem_whitelist ?
-		(lxc_string_in_array("@named", subsystem_whitelist) || lxc_string_in_array("@all", subsystem_whitelist)) :
-		false;
-
-	meta_data = calloc(1, sizeof(struct cgroup_meta_data));
-	if (!meta_data)
-		return NULL;
-	meta_data->ref = 1;
-
-	if (!find_cgroup_subsystems(&kernel_subsystems))
-		goto out_error;
-
-	if (!find_cgroup_hierarchies(meta_data, all_kernel_subsystems,
-				all_named_subsystems, subsystem_whitelist))
-		goto out_error;
-
-	if (!find_hierarchy_mountpts(meta_data, kernel_subsystems))
-		goto out_error;
-
-	/* oops, we couldn't find anything */
-	if (!meta_data->hierarchies || !meta_data->mount_points) {
-		errno = EINVAL;
-		goto out_error;
-	}
-
-	lxc_free_array((void **)kernel_subsystems, free);
-	return meta_data;
-
-out_error:
-	saved_errno = errno;
-	lxc_free_array((void **)kernel_subsystems, free);
-	lxc_cgroup_put_meta(meta_data);
-	errno = saved_errno;
-	return NULL;
-}
-
-struct cgroup_meta_data *lxc_cgroup_get_meta(struct cgroup_meta_data *meta_data)
-{
-	meta_data->ref++;
-	return meta_data;
-}
-
-struct cgroup_meta_data *lxc_cgroup_put_meta(struct cgroup_meta_data *meta_data)
-{
-	size_t i;
-	if (!meta_data)
-		return NULL;
-	if (--meta_data->ref > 0)
-		return meta_data;
-	lxc_free_array((void **)meta_data->mount_points, (lxc_free_fn)lxc_cgroup_mount_point_free);
-	if (meta_data->hierarchies) {
-		for (i = 0; i <= meta_data->maximum_hierarchy; i++)
-			lxc_cgroup_hierarchy_free(meta_data->hierarchies[i]);
-	}
-	free(meta_data->hierarchies);
-	free(meta_data);
-	return NULL;
-}
-
-struct cgroup_hierarchy *lxc_cgroup_find_hierarchy(struct cgroup_meta_data *meta_data, const char *subsystem)
-{
-	size_t i;
-	for (i = 0; i <= meta_data->maximum_hierarchy; i++) {
-		struct cgroup_hierarchy *h = meta_data->hierarchies[i];
-		if (h && lxc_string_in_array(subsystem, (const char **)h->subsystems))
-			return h;
-	}
-	return NULL;
-}
-
-struct cgroup_mount_point *lxc_cgroup_find_mount_point(struct cgroup_hierarchy *hierarchy, const char *group, bool should_be_writable)
-{
-	struct cgroup_mount_point **mps;
-	struct cgroup_mount_point *current_result = NULL;
-	ssize_t quality = -1;
-
-	/* trivial case */
-	if (hierarchy->rw_absolute_mount_point)
-		return hierarchy->rw_absolute_mount_point;
-	if (!should_be_writable && hierarchy->ro_absolute_mount_point)
-		return hierarchy->ro_absolute_mount_point;
-
-	for (mps = hierarchy->all_mount_points; mps && *mps; mps++) {
-		struct cgroup_mount_point *mp = *mps;
-		size_t prefix_len = mp->mount_prefix ? strlen(mp->mount_prefix) : 0;
-
-		if (prefix_len == 1 && mp->mount_prefix[0] == '/')
-			prefix_len = 0;
-
-		if (should_be_writable && mp->read_only)
-			continue;
-
-		if (!prefix_len ||
-		    (strncmp(group, mp->mount_prefix, prefix_len) == 0 &&
-		     (group[prefix_len] == '\0' || group[prefix_len] == '/'))) {
-			/* search for the best quality match, i.e. the match with the
-			 * shortest prefix where this group is still contained
-			 */
-			if (quality == -1 || prefix_len < quality) {
-				current_result = mp;
-				quality = prefix_len;
-			}
-		}
-	}
-
-	if (!current_result)
-		errno = ENOENT;
-	return current_result;
-}
-
-char *lxc_cgroup_find_abs_path(const char *subsystem, const char *group, bool should_be_writable, const char *suffix)
-{
-	struct cgroup_meta_data *meta_data;
-	struct cgroup_hierarchy *h;
-	struct cgroup_mount_point *mp;
-	char *result;
-	int saved_errno;
-
-	meta_data = lxc_cgroup_load_meta();
-	if (!meta_data)
-		return NULL;
-
-	h = lxc_cgroup_find_hierarchy(meta_data, subsystem);
-	if (!h)
-		goto out_error;
-
-	mp = lxc_cgroup_find_mount_point(h, group, should_be_writable);
-	if (!mp)
-		goto out_error;
-
-	result = cgroup_to_absolute_path(mp, group, suffix);
-	if (!result)
-		goto out_error;
-
-	lxc_cgroup_put_meta(meta_data);
-	return result;
-
-out_error:
-	saved_errno = errno;
-	lxc_cgroup_put_meta(meta_data);
-	errno = saved_errno;
-	return NULL;
-}
-
-struct cgroup_process_info *lxc_cgroup_process_info_get(pid_t pid, struct cgroup_meta_data *meta)
-{
-	char pid_buf[32];
-	snprintf(pid_buf, 32, "/proc/%lu/cgroup", (unsigned long)pid);
-	return lxc_cgroup_process_info_getx(pid_buf, meta);
-}
-
-struct cgroup_process_info *lxc_cgroup_process_info_get_init(struct cgroup_meta_data *meta)
-{
-	return lxc_cgroup_process_info_get(1, meta);
-}
-
-struct cgroup_process_info *lxc_cgroup_process_info_get_self(struct cgroup_meta_data *meta)
-{
-	struct cgroup_process_info *i;
-	i = lxc_cgroup_process_info_getx("/proc/self/cgroup", meta);
-	if (!i)
-		i = lxc_cgroup_process_info_get(getpid(), meta);
-	return i;
-}
-
-/*
- * If a controller has ns cgroup mounted, then in that cgroup the handler->pid
- * is already in a new cgroup named after the pid.  'mnt' is passed in as
- * the full current cgroup.  Say that is /sys/fs/cgroup/lxc/2975 and the container
- * name is c1. .  We want to rename the cgroup directory to /sys/fs/cgroup/lxc/c1,
- * and return the string /sys/fs/cgroup/lxc/c1.
- */
-static char *cgroup_rename_nsgroup(const char *mountpath, const char *oldname, pid_t pid, const char *name)
-{
-	char *dir, *fulloldpath;
-	char *newname, *fullnewpath;
-	int len, newlen, ret;
-
-	/*
-	 * if cgroup is mounted at /cgroup and task is in cgroup /ab/, pid 2375 and
-	 * name is c1,
-	 * dir: /ab
-	 * fulloldpath = /cgroup/ab/2375
-	 * fullnewpath = /cgroup/ab/c1
-	 * newname = /ab/c1
-	 */
-	dir = alloca(strlen(oldname) + 1);
-	strcpy(dir, oldname);
-
-	len = strlen(oldname) + strlen(mountpath) + 22;
-	fulloldpath = alloca(len);
-	ret = snprintf(fulloldpath, len, "%s/%s/%ld", mountpath, oldname, (unsigned long)pid);
-	if (ret < 0 || ret >= len)
-		return NULL;
-
-	len = strlen(dir) + strlen(name) + 2;
-	newname = malloc(len);
-	if (!newname) {
-		SYSERROR("Out of memory");
-		return NULL;
-	}
-	ret = snprintf(newname, len, "%s/%s", dir, name);
-	if (ret < 0 || ret >= len) {
-		free(newname);
-		return NULL;
-	}
-
-	newlen = strlen(mountpath) + len + 2;
-	fullnewpath = alloca(newlen);
-	ret = snprintf(fullnewpath, newlen, "%s/%s", mountpath, newname);
-	if (ret < 0 || ret >= newlen) {
-		free(newname);
-		return NULL;
-	}
-
-	if (access(fullnewpath, F_OK) == 0) {
-		if (rmdir(fullnewpath) != 0) {
-			SYSERROR("container cgroup %s already exists.", fullnewpath);
-			free(newname);
-			return NULL;
-		}
-	}
-	if (rename(fulloldpath, fullnewpath)) {
-		SYSERROR("failed to rename cgroup %s->%s", fulloldpath, fullnewpath);
-		free(newname);
-		return NULL;
-	}
-
-	DEBUG("'%s' renamed to '%s'", oldname, newname);
-
-	return newname;
-}
-
-/* create a new cgroup */
-struct cgroup_process_info *lxc_cgroupfs_create(const char *name, const char *path_pattern, struct cgroup_meta_data *meta_data, const char *sub_pattern)
-{
-	char **cgroup_path_components = NULL;
-	char **p = NULL;
-	char *path_so_far = NULL;
-	char **new_cgroup_paths = NULL;
-	char **new_cgroup_paths_sub = NULL;
-	struct cgroup_mount_point *mp;
-	struct cgroup_hierarchy *h;
-	struct cgroup_process_info *base_info = NULL;
-	struct cgroup_process_info *info_ptr;
-	int saved_errno;
-	int r;
-	unsigned suffix = 0;
-	bool had_sub_pattern = false;
-	size_t i;
-
-	if (!is_valid_cgroup(name)) {
-		ERROR("Invalid cgroup name: '%s'", name);
-		errno = EINVAL;
-		return NULL;
-	}
-
-	if (!strstr(path_pattern, "%n")) {
-		ERROR("Invalid cgroup path pattern: '%s'; contains no %%n for specifying container name", path_pattern);
-		errno = EINVAL;
-		return NULL;
-	}
-
-	/* we will modify the result of this operation directly,
-	 * so we don't have to copy the data structure
-	 */
-	base_info = (path_pattern[0] == '/') ?
-		lxc_cgroup_process_info_get_init(meta_data) :
-		lxc_cgroup_process_info_get_self(meta_data);
-	if (!base_info)
-		return NULL;
-
-	new_cgroup_paths = calloc(meta_data->maximum_hierarchy + 1, sizeof(char *));
-	if (!new_cgroup_paths)
-		goto out_initial_error;
-
-	new_cgroup_paths_sub = calloc(meta_data->maximum_hierarchy + 1, sizeof(char *));
-	if (!new_cgroup_paths_sub)
-		goto out_initial_error;
-
-	/* find mount points we can use */
-	for (info_ptr = base_info; info_ptr; info_ptr = info_ptr->next) {
-		h = info_ptr->hierarchy;
-		mp = lxc_cgroup_find_mount_point(h, info_ptr->cgroup_path, true);
-		if (!mp) {
-			ERROR("Could not find writable mount point for cgroup hierarchy %d while trying to create cgroup.", h->index);
-			goto out_initial_error;
-		}
-		info_ptr->designated_mount_point = mp;
-
-		if (lxc_string_in_array("ns", (const char **)h->subsystems))
-			continue;
-		if (handle_cgroup_settings(mp, info_ptr->cgroup_path) < 0) {
-			ERROR("Could not set clone_children to 1 for cpuset hierarchy in parent cgroup.");
-			goto out_initial_error;
-		}
-	}
-
-	/* normalize the path */
-	cgroup_path_components = lxc_normalize_path(path_pattern);
-	if (!cgroup_path_components)
-		goto out_initial_error;
-
-	/* go through the path components to see if we can create them */
-	for (p = cgroup_path_components; *p || (sub_pattern && !had_sub_pattern); p++) {
-		/* we only want to create the same component with -1, -2, etc.
-		 * if the component contains the container name itself, otherwise
-		 * it's not an error if it already exists
-		 */
-		char *p_eff = *p ? *p : (char *)sub_pattern;
-		bool contains_name = strstr(p_eff, "%n");
-		char *current_component = NULL;
-		char *current_subpath = NULL;
-		char *current_entire_path = NULL;
-		char *parts[3];
-		size_t j = 0;
-		i = 0;
-
-		/* if we are processing the subpattern, we want to make sure
-		 * loop is ended the next time around
-		 */
-		if (!*p) {
-			had_sub_pattern = true;
-			p--;
-		}
-
-		goto find_name_on_this_level;
-	
-	cleanup_name_on_this_level:
-		/* This is reached if we found a name clash.
-		 * In that case, remove the cgroup from all previous hierarchies
-		 */
-		for (j = 0, info_ptr = base_info; j < i && info_ptr; info_ptr = info_ptr->next, j++) {
-			r = remove_cgroup(info_ptr->designated_mount_point, info_ptr->created_paths[info_ptr->created_paths_count - 1], false);
-			if (r < 0)
-				WARN("could not clean up cgroup we created when trying to create container");
-			free(info_ptr->created_paths[info_ptr->created_paths_count - 1]);
-			info_ptr->created_paths[--info_ptr->created_paths_count] = NULL;
-		}
-		if (current_component != current_subpath)
-			free(current_subpath);
-		if (current_component != p_eff)
-			free(current_component);
-		current_component = current_subpath = NULL;
-		/* try again with another suffix */
-		++suffix;
-	
-	find_name_on_this_level:
-		/* determine name of the path component we should create */
-		if (contains_name && suffix > 0) {
-			char *buf = calloc(strlen(name) + 32, 1);
-			if (!buf)
-				goto out_initial_error;
-			snprintf(buf, strlen(name) + 32, "%s-%u", name, suffix);
-			current_component = lxc_string_replace("%n", buf, p_eff);
-			free(buf);
-		} else {
-			current_component = contains_name ? lxc_string_replace("%n", name, p_eff) : p_eff;
-		}
-		parts[0] = path_so_far;
-		parts[1] = current_component;
-		parts[2] = NULL;
-		current_subpath = path_so_far ? lxc_string_join("/", (const char **)parts, false) : current_component;
-
-		/* Now go through each hierarchy and try to create the
-		 * corresponding cgroup
-		 */
-		for (i = 0, info_ptr = base_info; info_ptr; info_ptr = info_ptr->next, i++) {
-			char *parts2[3];
-
-			if (lxc_string_in_array("ns", (const char **)info_ptr->hierarchy->subsystems))
-				continue;
-			current_entire_path = NULL;
-
-			parts2[0] = !strcmp(info_ptr->cgroup_path, "/") ? "" : info_ptr->cgroup_path;
-			parts2[1] = current_subpath;
-			parts2[2] = NULL;
-			current_entire_path = lxc_string_join("/", (const char **)parts2, false);
-
-			if (!*p) {
-				/* we are processing the subpath, so only update that one */
-				free(new_cgroup_paths_sub[i]);
-				new_cgroup_paths_sub[i] = strdup(current_entire_path);
-				if (!new_cgroup_paths_sub[i])
-					goto cleanup_from_error;
-			} else {
-				/* remember which path was used on this controller */
-				free(new_cgroup_paths[i]);
-				new_cgroup_paths[i] = strdup(current_entire_path);
-				if (!new_cgroup_paths[i])
-					goto cleanup_from_error;
-			}
-
-			r = create_cgroup(info_ptr->designated_mount_point, current_entire_path);
-			if (r < 0 && errno == EEXIST && contains_name) {
-				/* name clash => try new name with new suffix */
-				free(current_entire_path);
-				current_entire_path = NULL;
-				goto cleanup_name_on_this_level;
-			} else if (r < 0 && errno != EEXIST) {
-				SYSERROR("Could not create cgroup %s", current_entire_path);
-				goto cleanup_from_error;
-			} else if (r == 0) {
-				/* successfully created */
-				r = lxc_grow_array((void ***)&info_ptr->created_paths, &info_ptr->created_paths_capacity, info_ptr->created_paths_count + 1, 8);
-				if (r < 0)
-					goto cleanup_from_error;
-				if (!init_cpuset_if_needed(info_ptr->designated_mount_point, current_entire_path)) {
-					ERROR("Failed to initialize cpuset in new '%s'.", current_entire_path);
-					goto cleanup_from_error;
-				}
-				info_ptr->created_paths[info_ptr->created_paths_count++] = current_entire_path;
-			} else {
-				/* if we didn't create the cgroup, then we have to make sure that
-				 * further cgroups will be created properly
-				 */
-				if (handle_cgroup_settings(info_ptr->designated_mount_point, info_ptr->cgroup_path) < 0) {
-					ERROR("Could not set clone_children to 1 for cpuset hierarchy in pre-existing cgroup.");
-					goto cleanup_from_error;
-				}
-				if (!init_cpuset_if_needed(info_ptr->designated_mount_point, info_ptr->cgroup_path)) {
-					ERROR("Failed to initialize cpuset in pre-existing '%s'.", info_ptr->cgroup_path);
-					goto cleanup_from_error;
-				}
-
-				/* already existed but path component of pattern didn't contain '%n',
-				 * so this is not an error; but then we don't need current_entire_path
-				 * anymore...
-				 */
-				free(current_entire_path);
-				current_entire_path = NULL;
-			}
-		}
-
-		/* save path so far */
-		free(path_so_far);
-		path_so_far = strdup(current_subpath);
-		if (!path_so_far)
-			goto cleanup_from_error;
-
-		/* cleanup */
-		if (current_component != current_subpath)
-			free(current_subpath);
-		if (current_component != p_eff)
-			free(current_component);
-		current_component = current_subpath = NULL;
-		continue;
-	
-	cleanup_from_error:
-		/* called if an error occured in the loop, so we
-		 * do some additional cleanup here
-		 */
-		saved_errno = errno;
-		if (current_component != current_subpath)
-			free(current_subpath);
-		if (current_component != p_eff)
-			free(current_component);
-		free(current_entire_path);
-		errno = saved_errno;
-		goto out_initial_error;
-	}
-
-	/* we're done, now update the paths */
-	for (i = 0, info_ptr = base_info; info_ptr; info_ptr = info_ptr->next, i++) {
-		/* ignore legacy 'ns' subsystem here, lxc_cgroup_create_legacy
-		 * will take care of it
-		 * Since we do a continue in above loop, new_cgroup_paths[i] is
-		 * unset anyway, as is new_cgroup_paths_sub[i]
-		 */
-		if (lxc_string_in_array("ns", (const char **)info_ptr->hierarchy->subsystems))
-			continue;
-		free(info_ptr->cgroup_path);
-		info_ptr->cgroup_path = new_cgroup_paths[i];
-		info_ptr->cgroup_path_sub = new_cgroup_paths_sub[i];
-	}
-	/* don't use lxc_free_array since we used the array members
-	 * to store them in our result...
-	 */
-	free(new_cgroup_paths);
-	free(new_cgroup_paths_sub);
-	free(path_so_far);
-	lxc_free_array((void **)cgroup_path_components, free);
-	return base_info;
-
-out_initial_error:
-	saved_errno = errno;
-	free(path_so_far);
-	lxc_cgroup_process_info_free_and_remove(base_info);
-	lxc_free_array((void **)new_cgroup_paths, free);
-	lxc_free_array((void **)new_cgroup_paths_sub, free);
-	lxc_free_array((void **)cgroup_path_components, free);
-	errno = saved_errno;
-	return NULL;
-}
-
-int lxc_cgroup_create_legacy(struct cgroup_process_info *base_info, const char *name, pid_t pid)
-{
-	struct cgroup_process_info *info_ptr;
-	int r;
-
-	for (info_ptr = base_info; info_ptr; info_ptr = info_ptr->next) {
-		if (!lxc_string_in_array("ns", (const char **)info_ptr->hierarchy->subsystems))
-			continue;
-		/*
-		 * For any path which has ns cgroup mounted, handler->pid is already
-		 * moved into a container called '%d % (handler->pid)'.  Rename it to
-		 * the cgroup name and record that.
-		 */
-		char *tmp = cgroup_rename_nsgroup((const char *)info_ptr->designated_mount_point->mount_point,
-				info_ptr->cgroup_path, pid, name);
-		if (!tmp)
-			return -1;
-		free(info_ptr->cgroup_path);
-		info_ptr->cgroup_path = tmp;
-		r = lxc_grow_array((void ***)&info_ptr->created_paths, &info_ptr->created_paths_capacity, info_ptr->created_paths_count + 1, 8);
-		if (r < 0)
-			return -1;
-		tmp = strdup(tmp);
-		if (!tmp)
-			return -1;
-		info_ptr->created_paths[info_ptr->created_paths_count++] = tmp;
-	}
-	return 0;
-}
-
-/* get the cgroup membership of a given container */
-struct cgroup_process_info *lxc_cgroup_get_container_info(const char *name, const char *lxcpath, struct cgroup_meta_data *meta_data)
-{
-	struct cgroup_process_info *result = NULL;
-	int saved_errno = 0;
-	size_t i;
-	struct cgroup_process_info **cptr = &result;
-	struct cgroup_process_info *entry = NULL;
-	char *path = NULL;
-
-	for (i = 0; i <= meta_data->maximum_hierarchy; i++) {
-		struct cgroup_hierarchy *h = meta_data->hierarchies[i];
-		if (!h || !h->used)
-			continue;
-
-		/* use the command interface to look for the cgroup */
-		path = lxc_cmd_get_cgroup_path(name, lxcpath, h->subsystems[0]);
-		if (!path)
-			goto out_error;
-
-		entry = calloc(1, sizeof(struct cgroup_process_info));
-		if (!entry)
-			goto out_error;
-		entry->meta_ref = lxc_cgroup_get_meta(meta_data);
-		entry->hierarchy = h;
-		entry->cgroup_path = path;
-		path = NULL;
-
-		/* it is not an error if we don't find anything here,
-		 * it is up to the caller to decide what to do in that
-		 * case */
-		entry->designated_mount_point = lxc_cgroup_find_mount_point(h, entry->cgroup_path, true);
-
-		*cptr = entry;
-		cptr = &entry->next;
-		entry = NULL;
-	}
-
-	return result;
-out_error:
-	saved_errno = errno;
-	free(path);
-	lxc_cgroup_process_info_free(result);
-	lxc_cgroup_process_info_free(entry);
-	errno = saved_errno;
-	return NULL;
-}
-
-/* move a processs to the cgroups specified by the membership */
-int lxc_cgroupfs_enter(struct cgroup_process_info *info, pid_t pid, bool enter_sub)
-{
-	char pid_buf[32];
-	char *cgroup_tasks_fn;
-	int r;
-	struct cgroup_process_info *info_ptr;
-
-	snprintf(pid_buf, 32, "%lu", (unsigned long)pid);
-	for (info_ptr = info; info_ptr; info_ptr = info_ptr->next) {
-		char *cgroup_path = (enter_sub && info_ptr->cgroup_path_sub) ?
-			info_ptr->cgroup_path_sub :
-			info_ptr->cgroup_path;
-
-		if (!info_ptr->designated_mount_point) {
-			info_ptr->designated_mount_point = lxc_cgroup_find_mount_point(info_ptr->hierarchy, cgroup_path, true);
-			if (!info_ptr->designated_mount_point) {
-				SYSERROR("Could not add pid %lu to cgroup %s: internal error (couldn't find any writable mountpoint to cgroup filesystem)", (unsigned long)pid, cgroup_path);
-				return -1;
-			}
-		}
-
-		cgroup_tasks_fn = cgroup_to_absolute_path(info_ptr->designated_mount_point, cgroup_path, "/tasks");
-		if (!cgroup_tasks_fn) {
-			SYSERROR("Could not add pid %lu to cgroup %s: internal error", (unsigned long)pid, cgroup_path);
-			return -1;
-		}
-
-		r = lxc_write_to_file(cgroup_tasks_fn, pid_buf, strlen(pid_buf), false);
-		free(cgroup_tasks_fn);
-		if (r < 0) {
-			SYSERROR("Could not add pid %lu to cgroup %s: internal error", (unsigned long)pid, cgroup_path);
-			return -1;
-		}
-	}
-
-	return 0;
-}
-
-/* free process membership information */
-void lxc_cgroup_process_info_free(struct cgroup_process_info *info)
-{
-	struct cgroup_process_info *next;
-	if (!info)
-		return;
-	next = info->next;
-	lxc_cgroup_put_meta(info->meta_ref);
-	free(info->cgroup_path);
-	free(info->cgroup_path_sub);
-	lxc_free_array((void **)info->created_paths, free);
-	free(info);
-	lxc_cgroup_process_info_free(next);
-}
-
-/* free process membership information and remove cgroups that were created */
-void lxc_cgroup_process_info_free_and_remove(struct cgroup_process_info *info)
-{
-	struct cgroup_process_info *next;
-	char **pp;
-	if (!info)
-		return;
-	next = info->next;
-	{
-		struct cgroup_mount_point *mp = info->designated_mount_point;
-		if (!mp)
-			mp = lxc_cgroup_find_mount_point(info->hierarchy, info->cgroup_path, true);
-		if (mp)
-			/* ignore return value here, perhaps we created the
-			 * '/lxc' cgroup in this container but another container
-			 * is still running (for example)
-			 */
-			(void)remove_cgroup(mp, info->cgroup_path, true);
-	}
-	for (pp = info->created_paths; pp && *pp; pp++);
-	for ((void)(pp && --pp); info->created_paths && pp >= info->created_paths; --pp) {
-		free(*pp);
-	}
-	free(info->created_paths);
-	lxc_cgroup_put_meta(info->meta_ref);
-	free(info->cgroup_path);
-	free(info->cgroup_path_sub);
-	free(info);
-	lxc_cgroup_process_info_free_and_remove(next);
-}
-
-static char *lxc_cgroup_get_hierarchy_path_handler(const char *subsystem, struct lxc_handler *handler)
-{
-	struct cgfs_data *d = handler->cgroup_info->data;
-	struct cgroup_process_info *info = d->info;
-	info = find_info_for_subsystem(info, subsystem);
-	if (!info)
-		return NULL;
-	return info->cgroup_path;
-}
-
-char *lxc_cgroup_get_hierarchy_path(const char *subsystem, const char *name, const char *lxcpath)
-{
-	return lxc_cmd_get_cgroup_path(name, lxcpath, subsystem);
-}
-
-char *lxc_cgroup_get_hierarchy_abs_path_handler(const char *subsystem, struct lxc_handler *handler)
-{
-	struct cgfs_data *d = handler->cgroup_info->data;
-	struct cgroup_process_info *info = d->info;
-	struct cgroup_mount_point *mp = NULL;
-
-	info = find_info_for_subsystem(info, subsystem);
-	if (!info)
-		return NULL;
-	if (info->designated_mount_point) {
-		mp = info->designated_mount_point;
-	} else {
-		mp = lxc_cgroup_find_mount_point(info->hierarchy, info->cgroup_path, true);
-		if (!mp)
-			return NULL;
-	}
-	return cgroup_to_absolute_path(mp, info->cgroup_path, NULL);
-}
-
-char *lxc_cgroup_get_hierarchy_abs_path(const char *subsystem, const char *name, const char *lxcpath)
-{
-	struct cgroup_meta_data *meta;
-	struct cgroup_process_info *base_info, *info;
-	struct cgroup_mount_point *mp;
-	char *result = NULL;
-
-	meta = lxc_cgroup_load_meta();
-	if (!meta)
-		return NULL;
-	base_info = lxc_cgroup_get_container_info(name, lxcpath, meta);
-	if (!base_info)
-		goto out1;
-	info = find_info_for_subsystem(base_info, subsystem);
-	if (!info)
-		goto out2;
-	if (info->designated_mount_point) {
-		mp = info->designated_mount_point;
-	} else {
-		mp = lxc_cgroup_find_mount_point(info->hierarchy, info->cgroup_path, true);
-		if (!mp)
-			goto out3;
-	}
-	result = cgroup_to_absolute_path(mp, info->cgroup_path, NULL);
-out3:
-out2:
-	lxc_cgroup_process_info_free(base_info);
-out1:
-	lxc_cgroup_put_meta(meta);
-	return result;
-}
-
-int lxc_cgroup_set_handler(const char *filename, const char *value, struct lxc_handler *handler)
-{
-	char *subsystem = NULL, *p, *path;
-	int ret = -1;
-
-	subsystem = alloca(strlen(filename) + 1);
-	strcpy(subsystem, filename);
-	if ((p = index(subsystem, '.')) != NULL)
-		*p = '\0';
-
-	path = lxc_cgroup_get_hierarchy_abs_path_handler(subsystem, handler);
-	if (path) {
-		ret = do_cgroup_set(path, filename, value);
-		free(path);
-	}
-	return ret;
-}
-
-int lxc_cgroup_get_handler(const char *filename, char *value, size_t len, struct lxc_handler *handler)
-{
-	char *subsystem = NULL, *p, *path;
-	int ret = -1;
-
-	subsystem = alloca(strlen(filename) + 1);
-	strcpy(subsystem, filename);
-	if ((p = index(subsystem, '.')) != NULL)
-		*p = '\0';
-
-	path = lxc_cgroup_get_hierarchy_abs_path_handler(subsystem, handler);
-	if (path) {
-		ret = do_cgroup_get(path, filename, value, len);
-		free(path);
-	}
-	return ret;
-}
-
-int lxc_cgroupfs_set(const char *filename, const char *value, const char *name, const char *lxcpath)
-{
-	char *subsystem = NULL, *p, *path;
-	int ret = -1;
-
-	subsystem = alloca(strlen(filename) + 1);
-	strcpy(subsystem, filename);
-	if ((p = index(subsystem, '.')) != NULL)
-		*p = '\0';
-
-	path = lxc_cgroup_get_hierarchy_abs_path(subsystem, name, lxcpath);
-	if (path) {
-		ret = do_cgroup_set(path, filename, value);
-		free(path);
-	}
-	return ret;
-}
-
-int lxc_cgroupfs_get(const char *filename, char *value, size_t len, const char *name, const char *lxcpath)
-{
-	char *subsystem = NULL, *p, *path;
-	int ret = -1;
-
-	subsystem = alloca(strlen(filename) + 1);
-	strcpy(subsystem, filename);
-	if ((p = index(subsystem, '.')) != NULL)
-		*p = '\0';
-
-	path = lxc_cgroup_get_hierarchy_abs_path(subsystem, name, lxcpath);
-	if (path) {
-		ret = do_cgroup_get(path, filename, value, len);
-		free(path);
-	}
-	return ret;
-}
-
-/*
- * lxc_cgroup_path_get: Get the absolute pathname for a cgroup
- * file for a running container.
- *
- * @filename  : the file of interest (e.g. "freezer.state") or
- *              the subsystem name (e.g. "freezer") in which case
- *              the directory where the cgroup may be modified
- *              will be returned
- * @name      : name of container to connect to
- * @lxcpath   : the lxcpath in which the container is running
- *
- * This is the exported function, which determines cgpath from the
- * lxc-start of the @name container running in @lxcpath.
- *
- * Returns path on success, NULL on error. The caller must free()
- * the returned path.
- */
-char *lxc_cgroup_path_get(const char *filename, const char *name,
-                          const char *lxcpath)
-{
-	char *subsystem = NULL, *longer_file = NULL, *p, *group, *path;
-
-	subsystem = alloca(strlen(filename) + 1);
-	strcpy(subsystem, filename);
-	if ((p = index(subsystem, '.')) != NULL) {
-		*p = '\0';
-		longer_file = alloca(strlen(filename) + 2);
-		longer_file[0] = '/';
-		strcpy(longer_file + 1, filename);
-	}
-
-	group = lxc_cgroup_get_hierarchy_path(subsystem, name, lxcpath);
-	if (!group)
-		return NULL;
-
-	path = lxc_cgroup_find_abs_path(subsystem, group, true, p ? longer_file : NULL);
-	free(group);
-	return path;
-}
-
-static bool cgroupfs_mount_cgroup(const char *root,
-		struct lxc_cgroup_info *cgroup_info, int type)
-{
-	size_t bufsz = strlen(root) + sizeof("/sys/fs/cgroup");
-	char *path = NULL;
-	char **parts = NULL;
-	char *dirname = NULL;
-	char *abs_path = NULL;
-	char *abs_path2 = NULL;
-	struct cgfs_data *cgfs_d;
-	struct cgroup_process_info *info, *base_info;
-	int r, saved_errno = 0;
-
-	init_cg_ops();
-
-	cgfs_d = cgroup_info->data;
-	base_info = cgfs_d->info;
-
-	if (type < LXC_AUTO_CGROUP_RO || type > LXC_AUTO_CGROUP_FULL_MIXED) {
-		ERROR("could not mount cgroups into container: invalid type specified internally");
-		errno = EINVAL;
-		return false;
-	}
-
-	path = calloc(1, bufsz);
-	if (!path)
-		return false;
-	snprintf(path, bufsz, "%s/sys/fs/cgroup", root);
-	r = mount("cgroup_root", path, "tmpfs", MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_RELATIME, "size=10240k,mode=755");
-	if (r < 0) {
-		SYSERROR("could not mount tmpfs to /sys/fs/cgroup in the container");
-		return false;
-	}
-
-	/* now mount all the hierarchies we care about */
-	for (info = base_info; info; info = info->next) {
-		size_t subsystem_count, i;
-		struct cgroup_mount_point *mp = info->designated_mount_point;
-		if (!mp)
-			mp = lxc_cgroup_find_mount_point(info->hierarchy, info->cgroup_path, true);
-		if (!mp) {
-			SYSERROR("could not find original mount point for cgroup hierarchy while trying to mount cgroup filesystem");
-			goto out_error;
-		}
-
-		subsystem_count = lxc_array_len((void **)info->hierarchy->subsystems);
-		parts = calloc(subsystem_count + 1, sizeof(char *));
-		if (!parts)
-			goto out_error;
-
-		for (i = 0; i < subsystem_count; i++) {
-			if (!strncmp(info->hierarchy->subsystems[i], "name=", 5))
-				parts[i] = info->hierarchy->subsystems[i] + 5;
-			else
-				parts[i] = info->hierarchy->subsystems[i];
-		}
-		dirname = lxc_string_join(",", (const char **)parts, false);
-		if (!dirname)
-			goto out_error;
-
-		/* create subsystem directory */
-		abs_path = lxc_append_paths(path, dirname);
-		if (!abs_path)
-			goto out_error;
-		r = mkdir_p(abs_path, 0755);
-		if (r < 0 && errno != EEXIST) {
-			SYSERROR("could not create cgroup subsystem directory /sys/fs/cgroup/%s", dirname);
-			goto out_error;
-		}
-
-		abs_path2 = lxc_append_paths(abs_path, info->cgroup_path);
-		if (!abs_path2)
-			goto out_error;
-
-		if (type == LXC_AUTO_CGROUP_FULL_RO || type == LXC_AUTO_CGROUP_FULL_RW || type == LXC_AUTO_CGROUP_FULL_MIXED) {
-			/* bind-mount the cgroup entire filesystem there */
-			if (strcmp(mp->mount_prefix, "/") != 0) {
-				/* FIXME: maybe we should just try to remount the entire hierarchy
-				 *        with a regular mount command? may that works? */
-				ERROR("could not automatically mount cgroup-full to /sys/fs/cgroup/%s: host has no mount point for this cgroup filesystem that has access to the root cgroup", dirname);
-				goto out_error;
-			}
-			r = mount(mp->mount_point, abs_path, "none", MS_BIND, 0);
-			if (r < 0) {
-				SYSERROR("error bind-mounting %s to %s", mp->mount_point, abs_path);
-				goto out_error;
-			}
-			/* main cgroup path should be read-only */
-			if (type == LXC_AUTO_CGROUP_FULL_RO || type == LXC_AUTO_CGROUP_FULL_MIXED) {
-				r = mount(NULL, abs_path, NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL);
-				if (r < 0) {
-					SYSERROR("error re-mounting %s readonly", abs_path);
-					goto out_error;
-				}
-			}
-			/* own cgroup should be read-write */
-			if (type == LXC_AUTO_CGROUP_FULL_MIXED) {
-				r = mount(abs_path2, abs_path2, NULL, MS_BIND, NULL);
-				if (r < 0) {
-					SYSERROR("error bind-mounting %s onto itself", abs_path2);
-					goto out_error;
-				}
-				r = mount(NULL, abs_path2, NULL, MS_REMOUNT|MS_BIND, NULL);
-				if (r < 0) {
-					SYSERROR("error re-mounting %s readwrite", abs_path2);
-					goto out_error;
-				}
-			}
-		} else {
-			/* create path for container's cgroup */
-			r = mkdir_p(abs_path2, 0755);
-			if (r < 0 && errno != EEXIST) {
-				SYSERROR("could not create cgroup directory /sys/fs/cgroup/%s%s", dirname, info->cgroup_path);
-				goto out_error;
-			}
-
-			free(abs_path);
-			abs_path = NULL;
-
-			/* bind-mount container's cgroup to that directory */
-			abs_path = cgroup_to_absolute_path(mp, info->cgroup_path, NULL);
-			if (!abs_path)
-				goto out_error;
-			r = mount(abs_path, abs_path2, "none", MS_BIND, 0);
-			if (r < 0) {
-				SYSERROR("error bind-mounting %s to %s", abs_path, abs_path2);
-				goto out_error;
-			}
-			if (type == LXC_AUTO_CGROUP_RO) {
-				r = mount(NULL, abs_path2, NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL);
-				if (r < 0) {
-					SYSERROR("error re-mounting %s readonly", abs_path2);
-					goto out_error;
-				}
-			}
-		}
-
-		free(abs_path);
-		free(abs_path2);
-		abs_path = NULL;
-		abs_path2 = NULL;
-
-		/* add symlinks for every single subsystem */
-		if (subsystem_count > 1) {
-			for (i = 0; i < subsystem_count; i++) {
-				abs_path = lxc_append_paths(path, parts[i]);
-				if (!abs_path)
-					goto out_error;
-				r = symlink(dirname, abs_path);
-				if (r < 0)
-					WARN("could not create symlink %s -> %s in /sys/fs/cgroup of container", parts[i], dirname);
-				free(abs_path);
-				abs_path = NULL;
-			}
-		}
-		free(dirname);
-		free(parts);
-		dirname = NULL;
-		parts = NULL;
-	}
-
-	/* try to remount the tmpfs readonly, since the container shouldn't
-	 * change anything (this will also make sure that trying to create
-	 * new cgroups outside the allowed area fails with an error instead
-	 * of simply causing this to create directories in the tmpfs itself)
-	 */
-	if (type != LXC_AUTO_CGROUP_RW && type != LXC_AUTO_CGROUP_FULL_RW)
-		mount(NULL, path, NULL, MS_REMOUNT|MS_RDONLY, NULL);
-
-	free(path);
-
-	return true;
-
-out_error:
-	saved_errno = errno;
-	free(path);
-	free(dirname);
-	free(parts);
-	free(abs_path);
-	free(abs_path2);
-	errno = saved_errno;
-	return false;
-}
-
-int lxc_cgroup_nrtasks_handler(struct lxc_handler *handler)
-{
-	struct cgfs_data *d = handler->cgroup_info->data;
-	struct cgroup_process_info *info = d->info;
-	struct cgroup_mount_point *mp = NULL;
-	char *abs_path = NULL;
-	int ret;
-
-	if (!info) {
-		errno = ENOENT;
-		return -1;
-	}
-
-	if (info->designated_mount_point) {
-		mp = info->designated_mount_point;
-	} else {
-		mp = lxc_cgroup_find_mount_point(info->hierarchy, info->cgroup_path, false);
-		if (!mp)
-			return -1;
-	}
-
-	abs_path = cgroup_to_absolute_path(mp, info->cgroup_path, NULL);
-	if (!abs_path)
-		return -1;
-
-	ret = cgroup_recursive_task_count(abs_path);
-	free(abs_path);
-	return ret;
-}
-
-static struct cgroup_process_info *
-lxc_cgroup_process_info_getx(const char *proc_pid_cgroup_str,
-			     struct cgroup_meta_data *meta)
-{
-	struct cgroup_process_info *result = NULL;
-	FILE *proc_pid_cgroup = NULL;
-	char *line = NULL;
-	size_t sz = 0;
-	int saved_errno = 0;
-	struct cgroup_process_info **cptr = &result;
-	struct cgroup_process_info *entry = NULL;
-
-	proc_pid_cgroup = fopen_cloexec(proc_pid_cgroup_str, "r");
-	if (!proc_pid_cgroup)
-		return NULL;
-
-	while (getline(&line, &sz, proc_pid_cgroup) != -1) {
-		/* file format: hierarchy:subsystems:group */
-		char *colon1;
-		char *colon2;
-		char *endptr;
-		int hierarchy_number;
-		struct cgroup_hierarchy *h = NULL;
-
-		if (!line[0])
-			continue;
-
-		if (line[strlen(line) - 1] == '\n')
-			line[strlen(line) - 1] = '\0';
-
-		colon1 = strchr(line, ':');
-		if (!colon1)
-			continue;
-		*colon1++ = '\0';
-		colon2 = strchr(colon1, ':');
-		if (!colon2)
-			continue;
-		*colon2++ = '\0';
-
-		endptr = NULL;
-		hierarchy_number = strtoul(line, &endptr, 10);
-		if (!endptr || *endptr)
-			continue;
-
-		if (hierarchy_number > meta->maximum_hierarchy) {
-			/* we encountered a hierarchy we didn't have before,
-			 * so probably somebody remounted some stuff in the
-			 * mean time...
-			 */
-			errno = EAGAIN;
-			goto out_error;
-		}
-
-		h = meta->hierarchies[hierarchy_number];
-		if (!h) {
-			/* we encountered a hierarchy that was thought to be
-			 * dead before, so probably somebody remounted some
-			 * stuff in the mean time...
-			 */
-			errno = EAGAIN;
-			goto out_error;
-		}
-
-		/* we are told that we should ignore this hierarchy */
-		if (!h->used)
-			continue;
-
-		entry = calloc(1, sizeof(struct cgroup_process_info));
-		if (!entry)
-			goto out_error;
-
-		entry->meta_ref = lxc_cgroup_get_meta(meta);
-		entry->hierarchy = h;
-		entry->cgroup_path = strdup(colon2);
-		if (!entry->cgroup_path)
-			goto out_error;
-
-		*cptr = entry;
-		cptr = &entry->next;
-		entry = NULL;
-	}
-
-	fclose(proc_pid_cgroup);
-	free(line);
-	return result;
-
-out_error:
-	saved_errno = errno;
-	if (proc_pid_cgroup)
-		fclose(proc_pid_cgroup);
-	lxc_cgroup_process_info_free(result);
-	lxc_cgroup_process_info_free(entry);
-	free(line);
-	errno = saved_errno;
-	return NULL;
-}
-
-static char **subsystems_from_mount_options(const char *mount_options,
-					    char **kernel_list)
-{
-	char *token, *str, *saveptr = NULL;
-	char **result = NULL;
-	size_t result_capacity = 0;
-	size_t result_count = 0;
-	int saved_errno;
-	int r;
-
-	str = alloca(strlen(mount_options)+1);
-	strcpy(str, mount_options);
-	for (; (token = strtok_r(str, ",", &saveptr)); str = NULL) {
-		/* we have a subsystem if it's either in the list of
-		 * subsystems provided by the kernel OR if it starts
-		 * with name= for named hierarchies
-		 */
-		if (!strncmp(token, "name=", 5) || lxc_string_in_array(token, (const char **)kernel_list)) {
-			r = lxc_grow_array((void ***)&result, &result_capacity, result_count + 1, 12);
-			if (r < 0)
-				goto out_free;
-			result[result_count + 1] = NULL;
-			result[result_count] = strdup(token);
-			if (!result[result_count])
-				goto out_free;
-			result_count++;
-		}
-	}
-
-	return result;
-
-out_free:
-	saved_errno = errno;
-	lxc_free_array((void**)result, free);
-	errno = saved_errno;
-	return NULL;
-}
-
-static void lxc_cgroup_mount_point_free(struct cgroup_mount_point *mp)
-{
-	if (!mp)
-		return;
-	free(mp->mount_point);
-	free(mp->mount_prefix);
-	free(mp);
-}
-
-static void lxc_cgroup_hierarchy_free(struct cgroup_hierarchy *h)
-{
-	if (!h)
-		return;
-	lxc_free_array((void **)h->subsystems, free);
-	free(h->all_mount_points);
-	free(h);
-}
-
-static bool is_valid_cgroup(const char *name)
-{
-	const char *p;
-	for (p = name; *p; p++) {
-		/* Use the ASCII printable characters range(32 - 127)
-		 * is reasonable, we kick out 32(SPACE) because it'll
-		 * break legacy lxc-ls
-		 */
-		if (*p <= 32 || *p >= 127 || *p == '/')
-			return false;
-	}
-	return strcmp(name, ".") != 0 && strcmp(name, "..") != 0;
-}
-
-static int create_or_remove_cgroup(bool do_remove,
-		struct cgroup_mount_point *mp, const char *path, int recurse)
-{
-	int r, saved_errno = 0;
-	char *buf = cgroup_to_absolute_path(mp, path, NULL);
-	if (!buf)
-		return -1;
-
-	/* create or remove directory */
-	if (do_remove) {
-		if (recurse)
-			r = cgroup_rmdir(buf);
-		else
-			r = rmdir(buf);
-	} else
-		r = mkdir(buf, 0777);
-	saved_errno = errno;
-	free(buf);
-	errno = saved_errno;
-	return r;
-}
-
-static int create_cgroup(struct cgroup_mount_point *mp, const char *path)
-{
-	return create_or_remove_cgroup(false, mp, path, false);
-}
-
-static int remove_cgroup(struct cgroup_mount_point *mp,
-			 const char *path, bool recurse)
-{
-	return create_or_remove_cgroup(true, mp, path, recurse);
-}
-
-static char *cgroup_to_absolute_path(struct cgroup_mount_point *mp,
-				     const char *path, const char *suffix)
-{
-	/* first we have to make sure we subtract the mount point's prefix */
-	char *prefix = mp->mount_prefix;
-	char *buf;
-	ssize_t len, rv;
-
-	/* we want to make sure only absolute paths to cgroups are passed to us */
-	if (path[0] != '/') {
-		errno = EINVAL;
-		return NULL;
-	}
-
-	if (prefix && !strcmp(prefix, "/"))
-		prefix = NULL;
-
-	/* prefix doesn't match */
-	if (prefix && strncmp(prefix, path, strlen(prefix)) != 0) {
-		errno = EINVAL;
-		return NULL;
-	}
-	/* if prefix is /foo and path is /foobar */
-	if (prefix && path[strlen(prefix)] != '/' && path[strlen(prefix)] != '\0') {
-		errno = EINVAL;
-		return NULL;
-	}
-
-	/* remove prefix from path */
-	path += prefix ? strlen(prefix) : 0;
-
-	len = strlen(mp->mount_point) + strlen(path) + (suffix ? strlen(suffix) : 0);
-	buf = calloc(len + 1, 1);
-	if (!buf)
-		return NULL;
-	rv = snprintf(buf, len + 1, "%s%s%s", mp->mount_point, path, suffix ? suffix : "");
-	if (rv > len) {
-		free(buf);
-		errno = ENOMEM;
-		return NULL;
-	}
-
-	return buf;
-}
-
-static struct cgroup_process_info *
-find_info_for_subsystem(struct cgroup_process_info *info, const char *subsystem)
-{
-	struct cgroup_process_info *info_ptr;
-	for (info_ptr = info; info_ptr; info_ptr = info_ptr->next) {
-		struct cgroup_hierarchy *h = info_ptr->hierarchy;
-		if (lxc_string_in_array(subsystem, (const char **)h->subsystems))
-			return info_ptr;
-	}
-	errno = ENOENT;
-	return NULL;
-}
-
-static int do_cgroup_get(const char *cgroup_path, const char *sub_filename,
-			 char *value, size_t len)
-{
-	const char *parts[3] = {
-		cgroup_path,
-		sub_filename,
-		NULL
-	};
-	char *filename;
-	int ret, saved_errno;
-
-	filename = lxc_string_join("/", parts, false);
-	if (!filename)
-		return -1;
-
-	ret = lxc_read_from_file(filename, value, len);
-	saved_errno = errno;
-	free(filename);
-	errno = saved_errno;
-	return ret;
-}
-
-static int do_cgroup_set(const char *cgroup_path, const char *sub_filename,
-			 const char *value)
-{
-	const char *parts[3] = {
-		cgroup_path,
-		sub_filename,
-		NULL
-	};
-	char *filename;
-	int ret, saved_errno;
-
-	filename = lxc_string_join("/", parts, false);
-	if (!filename)
-		return -1;
-
-	ret = lxc_write_to_file(filename, value, strlen(value), false);
-	saved_errno = errno;
-	free(filename);
-	errno = saved_errno;
-	return ret;
-}
-
-static int do_setup_cgroup_limits(struct lxc_handler *h,
-			   struct lxc_list *cgroup_settings, bool do_devices)
-{
-	struct lxc_list *iterator;
-	struct lxc_cgroup *cg;
-	int ret = -1;
-
-	if (lxc_list_empty(cgroup_settings))
-		return 0;
-
-	lxc_list_for_each(iterator, cgroup_settings) {
-		cg = iterator->elem;
-
-		if (do_devices == !strncmp("devices", cg->subsystem, 7)) {
-			if (strcmp(cg->subsystem, "devices.deny") == 0 &&
-					cgroup_devices_has_allow_or_deny(h, cg->value, false))
-				continue;
-			if (strcmp(cg->subsystem, "devices.allow") == 0 &&
-					cgroup_devices_has_allow_or_deny(h, cg->value, true))
-				continue;
-			if (lxc_cgroup_set_handler(cg->subsystem, cg->value, h)) {
-				ERROR("Error setting %s to %s for %s\n",
-				      cg->subsystem, cg->value, h->name);
-				goto out;
-			}
-		}
-
-		DEBUG("cgroup '%s' set to '%s'", cg->subsystem, cg->value);
-	}
-
-	ret = 0;
-	INFO("cgroup has been setup");
-out:
-	return ret;
-}
-
-static bool cgroup_devices_has_allow_or_deny(struct lxc_handler *h,
-					     char *v, bool for_allow)
-{
-	char *path;
-	FILE *devices_list;
-	char *line = NULL;
-	size_t sz = 0;
-	bool ret = !for_allow;
-	const char *parts[3] = {
-		NULL,
-		"devices.list",
-		NULL
-	};
-
-	// XXX FIXME if users could use something other than 'lxc.devices.deny = a'.
-	// not sure they ever do, but they *could*
-	// right now, I'm assuming they do NOT
-	if (!for_allow && strcmp(v, "a") != 0 && strcmp(v, "a *:* rwm") != 0)
-		return false;
-
-	parts[0] = (const char *)lxc_cgroup_get_hierarchy_abs_path_handler("devices", h);
-	if (!parts[0])
-		return false;
-	path = lxc_string_join("/", parts, false);
-	if (!path) {
-		free((void *)parts[0]);
-		return false;
-	}
-
-	devices_list = fopen_cloexec(path, "r");
-	if (!devices_list) {
-		free(path);
-		return false;
-	}
-
-	while (getline(&line, &sz, devices_list) != -1) {
-		size_t len = strlen(line);
-		if (len > 0 && line[len-1] == '\n')
-			line[len-1] = '\0';
-		if (strcmp(line, "a *:* rwm") == 0) {
-			ret = for_allow;
-			goto out;
-		} else if (for_allow && strcmp(line, v) == 0) {
-			ret = true;
-			goto out;
-		}
-	}
-
-out:
-	fclose(devices_list);
-	free(line);
-	free(path);
-	return ret;
-}
-
-static int cgroup_recursive_task_count(const char *cgroup_path)
-{
-	DIR *d;
-	struct dirent *dent_buf;
-	struct dirent *dent;
-	ssize_t name_max;
-	int n = 0, r;
-
-	/* see man readdir_r(3) */
-	name_max = pathconf(cgroup_path, _PC_NAME_MAX);
-	if (name_max <= 0)
-		name_max = 255;
-	dent_buf = malloc(offsetof(struct dirent, d_name) + name_max + 1);
-	if (!dent_buf)
-		return -1;
-
-	d = opendir(cgroup_path);
-	if (!d) {
-		free(dent_buf);
-		return 0;
-	}
-
-	while (readdir_r(d, dent_buf, &dent) == 0 && dent) {
-		const char *parts[3] = {
-			cgroup_path,
-			dent->d_name,
-			NULL
-		};
-		char *sub_path;
-		struct stat st;
-
-		if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, ".."))
-			continue;
-		sub_path = lxc_string_join("/", parts, false);
-		if (!sub_path) {
-			closedir(d);
-			free(dent_buf);
-			return -1;
-		}
-		r = stat(sub_path, &st);
-		if (r < 0) {
-			closedir(d);
-			free(dent_buf);
-			free(sub_path);
-			return -1;
-		}
-		if (S_ISDIR(st.st_mode)) {
-			r = cgroup_recursive_task_count(sub_path);
-			if (r >= 0)
-				n += r;
-		} else if (!strcmp(dent->d_name, "tasks")) {
-			r = count_lines(sub_path);
-			if (r >= 0)
-				n += r;
-		}
-		free(sub_path);
-	}
-	closedir(d);
-	free(dent_buf);
-
-	return n;
-}
-
-static int count_lines(const char *fn)
-{
-	FILE *f;
-	char *line = NULL;
-	size_t sz = 0;
-	int n = 0;
-
-	f = fopen_cloexec(fn, "r");
-	if (!f)
-		return -1;
-
-	while (getline(&line, &sz, f) != -1) {
-		n++;
-	}
-	free(line);
-	fclose(f);
-	return n;
-}
-
-static int handle_cgroup_settings(struct cgroup_mount_point *mp,
-				  char *cgroup_path)
-{
-	int r, saved_errno = 0;
-	char buf[2];
-
-	mp->need_cpuset_init = false;
-
-	/* If this is the memory cgroup, we want to enforce hierarchy.
-	 * But don't fail if for some reason we can't.
-	 */
-	if (lxc_string_in_array("memory", (const char **)mp->hierarchy->subsystems)) {
-		char *cc_path = cgroup_to_absolute_path(mp, cgroup_path, "/memory.use_hierarchy");
-		if (cc_path) {
-			r = lxc_read_from_file(cc_path, buf, 1);
-			if (r < 1 || buf[0] != '1') {
-				r = lxc_write_to_file(cc_path, "1", 1, false);
-				if (r < 0)
-					SYSERROR("failed to set memory.use_hiararchy to 1; continuing");
-			}
-			free(cc_path);
-		}
-	}
-
-	/* if this is a cpuset hierarchy, we have to set cgroup.clone_children in
-	 * the base cgroup, otherwise containers will start with an empty cpuset.mems
-	 * and cpuset.cpus and then
-	 */
-	if (lxc_string_in_array("cpuset", (const char **)mp->hierarchy->subsystems)) {
-		char *cc_path = cgroup_to_absolute_path(mp, cgroup_path, "/cgroup.clone_children");
-		struct stat sb;
-
-		if (!cc_path)
-			return -1;
-		/* cgroup.clone_children is not available when running under
-		 * older kernel versions; in this case, we'll initialize
-		 * cpuset.cpus and cpuset.mems later, after the new cgroup
-		 * was created
-		 */
-		if (stat(cc_path, &sb) != 0 && errno == ENOENT) {
-			mp->need_cpuset_init = true;
-			free(cc_path);
-			return 0;
-		}
-		r = lxc_read_from_file(cc_path, buf, 1);
-		if (r == 1 && buf[0] == '1') {
-			free(cc_path);
-			return 0;
-		}
-		r = lxc_write_to_file(cc_path, "1", 1, false);
-		saved_errno = errno;
-		free(cc_path);
-		errno = saved_errno;
-		return r < 0 ? -1 : 0;
-	}
-	return 0;
-}
-
-static int cgroup_read_from_file(const char *fn, char buf[], size_t bufsize)
-{
-	int ret = lxc_read_from_file(fn, buf, bufsize);
-	if (ret < 0) {
-		SYSERROR("failed to read %s", fn);
-		return ret;
-	}
-	if (ret == bufsize) {
-		if (bufsize > 0) {
-			/* obviously this wasn't empty */
-			buf[bufsize-1] = '\0';
-			return ret;
-		}
-		/* Callers don't do this, but regression/sanity check */
-		ERROR("%s: was not expecting 0 bufsize", __func__);
-		return -1;
-	}
-	buf[ret] = '\0';
-	return ret;
-}
-
-static bool do_init_cpuset_file(struct cgroup_mount_point *mp,
-				const char *path, const char *name)
-{
-	char value[1024];
-	char *childfile, *parentfile = NULL, *tmp;
-	int ret;
-	bool ok = false;
-
-	childfile = cgroup_to_absolute_path(mp, path, name);
-	if (!childfile)
-		return false;
-
-	/* don't overwrite a non-empty value in the file */
-	ret = cgroup_read_from_file(childfile, value, sizeof(value));
-	if (ret < 0)
-		goto out;
-	if (value[0] != '\0' && value[0] != '\n') {
-		ok = true;
-		goto out;
-	}
-
-	/* path to the same name in the parent cgroup */
-	parentfile = strdup(path);
-	if (!parentfile)
-		goto out;
-
-	tmp = strrchr(parentfile, '/');
-	if (!tmp)
-		goto out;
-	if (tmp == parentfile)
-		tmp++; /* keep the '/' at the start */
-	*tmp = '\0';
-	tmp = parentfile;
-	parentfile = cgroup_to_absolute_path(mp, tmp, name);
-	free(tmp);
-	if (!parentfile)
-		goto out;
-
-	/* copy from parent to child cgroup */
-	ret = cgroup_read_from_file(parentfile, value, sizeof(value));
-	if (ret < 0)
-		goto out;
-	if (ret == sizeof(value)) {
-		/* If anyone actually sees this error, we can address it */
-		ERROR("parent cpuset value too long");
-		goto out;
-	}
-	ok = (lxc_write_to_file(childfile, value, strlen(value), false) >= 0);
-	if (!ok)
-		SYSERROR("failed writing %s", childfile);
-
-out:
-	if (parentfile)
-		free(parentfile);
-	free(childfile);
-	return ok;
-}
-
-static bool init_cpuset_if_needed(struct cgroup_mount_point *mp,
-				  const char *path)
-{
-	/* the files we have to handle here are only in cpuset hierarchies */
-	if (!lxc_string_in_array("cpuset",
-				 (const char **)mp->hierarchy->subsystems))
-		return true;
-
-	if (!mp->need_cpuset_init)
-		return true;
-
-	return (do_init_cpuset_file(mp, path, "/cpuset.cpus") &&
-		do_init_cpuset_file(mp, path, "/cpuset.mems") );
-}
-
-extern void lxc_monitor_send_state(const char *name, lxc_state_t state,
-			    const char *lxcpath);
-int do_unfreeze(int freeze, const char *name, const char *lxcpath)
-{
-	char v[100];
-	const char *state = freeze ? "FROZEN" : "THAWED";
-
-	if (lxc_cgroup_set("freezer.state", state, name, lxcpath) < 0) {
-		ERROR("Failed to freeze %s:%s", lxcpath, name);
-		return -1;
-	}
-	while (1) {
-		if (lxc_cgroup_get("freezer.state", v, 100, name, lxcpath) < 0) {
-			ERROR("Failed to get new freezer state for %s:%s", lxcpath, name);
-			return -1;
-		}
-		if (v[strlen(v)-1] == '\n')
-			v[strlen(v)-1] = '\0';
-		if (strncmp(v, state, strlen(state)) == 0) {
-			if (name)
-				lxc_monitor_send_state(name, freeze ? FROZEN : THAWED, lxcpath);
-			return 0;
-		}
-		sleep(1);
-	}
-}
-
-int freeze_unfreeze(const char *name, int freeze, const char *lxcpath)
-{
-	return do_unfreeze(freeze, name, lxcpath);
-}
-
-lxc_state_t freezer_state(const char *name, const char *lxcpath)
-{
-	char v[100];
-	if (lxc_cgroup_get("freezer.state", v, 100, name, lxcpath) < 0)
-		return -1;
-
-	if (v[strlen(v)-1] == '\n')
-		v[strlen(v)-1] = '\0';
-	return lxc_str2state(v);
-}
-
-static void cgfs_destroy(struct lxc_handler *handler)
-{
-	struct cgfs_data *d = handler->cgroup_info->data;
-	if (!d)
-		return;
-	if (d->info)
-		lxc_cgroup_process_info_free_and_remove(d->info);
-	if (d->meta)
-		lxc_cgroup_put_meta(d->meta);
-	free(d);
-	handler->cgroup_info->data = NULL;
-}
-
-static inline bool cgfs_init(struct lxc_handler *handler)
-{
-	struct cgfs_data *d = malloc(sizeof(*d));
-	if (!d)
-		return false;
-	d->info = NULL;
-	d->meta = lxc_cgroup_load_meta();
-
-	if (!d->meta) {
-		ERROR("cgroupfs failed to detect cgroup metadata");
-		free(d);
-		return false;
-	}
-	handler->cgroup_info->data = d;
-	return true;
-}
-
-static inline bool cgfs_create(struct lxc_handler *handler)
-{
-	struct cgfs_data *d = handler->cgroup_info->data;
-	struct cgroup_process_info *i;
-	struct cgroup_meta_data *md = d->meta;
-	i = lxc_cgroupfs_create(handler->name, handler->cgroup_info->cgroup_pattern, md, NULL);
-	if (!i)
-		return false;
-	d->info = i;
-	return true;
-}
-
-static inline bool cgfs_enter(struct lxc_handler *handler)
-{
-	struct cgfs_data *d = handler->cgroup_info->data;
-	struct cgroup_process_info *i = d->info;
-	int ret;
-	
-	ret = lxc_cgroupfs_enter(i, handler->pid, false);
-
-	return ret == 0;
-}
-
-static inline bool cgfs_create_legacy(struct lxc_handler *handler)
-{
-	struct cgfs_data *d = handler->cgroup_info->data;
-	struct cgroup_process_info *i = d->info;
-	if (lxc_cgroup_create_legacy(i, handler->name, handler->pid) < 0) {
-		ERROR("failed to create legacy ns cgroups for '%s'", handler->name);
-		return false;
-	}
-	return true;
-}
-
-static char *cgfs_get_cgroup(struct lxc_handler *handler, const char *subsystem)
-{
-	return lxc_cgroup_get_hierarchy_path_handler(subsystem, handler);
-}
-
-static bool cgfs_unfreeze_fromhandler(struct lxc_handler *handler)
-{
-	char *cgabspath, *cgrelpath;
-	int ret;
-
-	cgrelpath = lxc_cgroup_get_hierarchy_path_handler("freezer", handler);
-	cgabspath = lxc_cgroup_find_abs_path("freezer", cgrelpath, true, NULL);
-	if (!cgabspath)
-		return false;
-
-	ret = do_cgroup_set(cgabspath, "freezer.state", "THAWED");
-	free(cgabspath);
-	return ret == 0;
-}
-
-bool cgroupfs_setup_limits(struct lxc_handler *h, bool with_devices)
-{
-	return do_setup_cgroup_limits(h, &h->conf->cgroup, with_devices) == 0;
-}
-
-bool lxc_cgroupfs_attach(const char *name, const char *lxcpath, pid_t pid)
-{
-	struct cgroup_meta_data *meta_data;
-	struct cgroup_process_info *container_info;
-	int ret;
-
-	meta_data = lxc_cgroup_load_meta();
-	if (!meta_data) {
-		ERROR("could not move attached process %d to cgroup of container", pid);
-		return false;
-	}
-
-	container_info = lxc_cgroup_get_container_info(name, lxcpath, meta_data);
-	lxc_cgroup_put_meta(meta_data);
-	if (!container_info) {
-		ERROR("could not move attached process %d to cgroup of container", pid);
-		return false;
-	}
-
-	ret = lxc_cgroupfs_enter(container_info, pid, false);
-	lxc_cgroup_process_info_free(container_info);
-	if (ret < 0) {
-		ERROR("could not move attached process %d to cgroup of container", pid);
-		return false;
-	}
-	return true;
-}
-
-static struct cgroup_ops cgfs_ops = {
-	.destroy = cgfs_destroy,
-	.init = cgfs_init,
-	.create = cgfs_create,
-	.enter = cgfs_enter,
-	.create_legacy = cgfs_create_legacy,
-	.get_cgroup = cgfs_get_cgroup,
-	.get = lxc_cgroupfs_get,
-	.set = lxc_cgroupfs_set,
-	.unfreeze_fromhandler = cgfs_unfreeze_fromhandler,
-	.setup_limits = cgroupfs_setup_limits,
-	.name = "cgroupfs",
-	.attach = lxc_cgroupfs_attach,
-	.chown = NULL,
-	.mount_cgroup = cgroupfs_mount_cgroup,
-};
-static void init_cg_ops(void)
-{
-	if (!use_cgmanager)
-		return;
-	if (cgmanager_initialized)
-		return;
-	if (!lxc_init_cgmanager()) {
-		ERROR("Could not contact cgroup manager, falling back to cgroupfs");
-		active_cg_ops = &cgfs_ops;
-	}
-}
-
-/*
- * These are the backend-independent cgroup handlers for container
- * start and stop
- */
-
-/* Free all cgroup info held by the handler */
-void cgroup_destroy(struct lxc_handler *handler)
-{
-	if (!handler->cgroup_info)
-		return;
-	if (active_cg_ops)
-		active_cg_ops->destroy(handler);
-}
-
-/*
- * Allocate a lxc_cgroup_info for the active cgroup
- * backend, and assign it to the handler
- */
-bool cgroup_init(struct lxc_handler *handler)
-{
-	init_cg_ops();
-	handler->cgroup_info = malloc(sizeof(struct lxc_cgroup_info));
-	if (!handler->cgroup_info)
-		return false;
-	memset(handler->cgroup_info, 0, sizeof(struct lxc_cgroup_info));
-	/* if we are running as root, use system cgroup pattern, otherwise
-	 * just create a cgroup under the current one. But also fall back to
-	 * that if for some reason reading the configuration fails and no
-	 * default value is available
-	 */
-	if (geteuid() == 0)
-		handler->cgroup_info->cgroup_pattern = lxc_global_config_value("lxc.cgroup.pattern");
-	if (!handler->cgroup_info->cgroup_pattern)
-		handler->cgroup_info->cgroup_pattern = "%n";
-
-	return active_cg_ops->init(handler);
-}
-
-/* Create the container cgroups for all requested controllers */
-bool cgroup_create(struct lxc_handler *handler)
-{
-	return active_cg_ops->create(handler);
-}
-
-/*
- * Enter the container init into its new cgroups for all
- * requested controllers */
-bool cgroup_enter(struct lxc_handler *handler)
-{
-	return active_cg_ops->enter(handler);
-}
-
-bool cgroup_create_legacy(struct lxc_handler *handler)
-{
-	if (active_cg_ops->create_legacy)
-		return active_cg_ops->create_legacy(handler);
-	return true;
-}
-
-char *cgroup_get_cgroup(struct lxc_handler *handler, const char *subsystem)
-{
-	return active_cg_ops->get_cgroup(handler, subsystem);
-}
-
-int lxc_cgroup_set(const char *filename, const char *value, const char *name, const char *lxcpath)
-{
-	init_cg_ops();
-	return active_cg_ops->set(filename, value, name, lxcpath);
-}
-
-int lxc_cgroup_get(const char *filename, char *value, size_t len, const char *name, const char *lxcpath)
-{
-	init_cg_ops();
-	return active_cg_ops->get(filename, value, len, name, lxcpath);
-}
-
-bool lxc_unfreeze_fromhandler(struct lxc_handler *handler)
-{
-	return active_cg_ops->unfreeze_fromhandler(handler);
-}
-
-bool cgroup_setup_limits(struct lxc_handler *handler, bool with_devices)
-{
-	return active_cg_ops->setup_limits(handler, with_devices);
-}
-
-bool cgroup_chown(struct lxc_handler *handler)
-{
-	if (active_cg_ops->chown)
-		return active_cg_ops->chown(handler);
-	return true;
-}
-
-bool lxc_cgroup_attach(const char *name, const char *lxcpath, pid_t pid)
-{
-	init_cg_ops();
-	return active_cg_ops->attach(name, lxcpath, pid);
-}
-
-bool lxc_setup_mount_cgroup(const char *root,
-		struct lxc_cgroup_info *cgroup_info, int type)
-{
-	return active_cg_ops->mount_cgroup(root, cgroup_info, type);
-}
-- 
1.8.5.3



More information about the lxc-devel mailing list