[lxc-devel] [PATCH RFC] Accomodate stricter devices cgroup rules

Serge Hallyn serge.hallyn at ubuntu.com
Sat Jul 6 00:34:55 UTC 2013


3.10 kernel comes with proper hierarchical enforcement of devices
cgroup.  To keep that code somewhat sane, certain things are not
allowed.  Switching from default-allow to default-deny and vice versa
are not allowed when there are children cgroups.  (This *could* be
simplified in the kernel by checking that all child cgroups are
unpopulated, but that has not yet been done and may be rejected)

The mountcgroup hook causes lxc-start to break with 3.10 kernels, because
you cannot write 'a' to devices.deny once you have a child cgroup.  With
this patch, (a) lxcpath is passed to hooks, (b) the cgroup mount hook sets
the container's devices cgroup, and (c) setup_cgroup() during lxc startup
ignores failures to write to devices subsystem if we are already in a
child of the container's new cgroup.

((a) is not really related to this bug, but is definately needed.
The followup work of making the other hooks use the passed-in lxcpath
is still to be done)

Signed-off-by: Serge Hallyn <serge.hallyn at ubuntu.com>
---
 hooks/mountcgroups     | 22 ++++++++++++++++++++++
 src/lxc/cgroup.c       | 37 +++++++++++++++++++++++++++++++++++++
 src/lxc/cgroup.h       |  2 ++
 src/lxc/conf.c         | 16 +++++++++-------
 src/lxc/conf.h         |  6 ++++--
 src/lxc/lxccontainer.c |  2 +-
 src/lxc/start.c        | 20 +++++++++++++-------
 7 files changed, 88 insertions(+), 17 deletions(-)

diff --git a/hooks/mountcgroups b/hooks/mountcgroups
index 879fa3c..c1cdcd4 100755
--- a/hooks/mountcgroups
+++ b/hooks/mountcgroups
@@ -28,15 +28,37 @@ set -e
 c=$1
 d=/sys/fs/cgroup
 d2=$LXC_ROOTFS_MOUNT/${d}
+# name lxc hook lxcpath
+lxcpath=$4
 if [ ! -d "$d" ]; then
     exit 0
 fi
 
 mount -n -t tmpfs tmpfs ${d2}
 
+do_devices_setup() {
+    local devdir="$1"
+    local c="$2"
+    local line
+    local w  # which (allow or deny)
+    local v  # value
+    egrep "^lxc.cgroup.devices.(allow|deny)[ \t]*=" ${lxcpath}/${c}/config | while read line; do
+        w=`echo $line | awk -F. '{ print $4 }' | awk '{ print $1 }'`
+        v=`echo $line | awk -F= '{ print $2 }'`
+        echo "$v" >> "$devdir"/devices.$w
+    done
+}
+
 # XXX TODO - we'll need to account for other cgroup groups beside 'lxc',
 # i.e. 'build' or 'users/joe'.
 for dir in `/bin/ls $d`; do
+    if [ "$dir" = "devices" ]; then
+        devicesdir="${d}/${dir}/lxc/${c}"
+        mkdir -p "$devicesdir"
+        # set the devices cgroup perms now - we can't change from blacklist to
+        # whitelist, or add perms, once we have children.
+        do_devices_setup "$devicesdir" "${c}"
+    fi
     mkdir -p "${d}/${dir}/lxc/${c}/${c}.real"
     echo 1 > "${d}/${dir}/lxc/${c}/${c}.real/tasks"
     mkdir -p ${d2}/${dir}
diff --git a/src/lxc/cgroup.c b/src/lxc/cgroup.c
index c28e249..38ed514 100644
--- a/src/lxc/cgroup.c
+++ b/src/lxc/cgroup.c
@@ -897,3 +897,40 @@ int lxc_cgroup_attach(pid_t pid, const char *name, const char *lxcpath)
 	free(dirpath);
 	return ret;
 }
+
+bool is_in_subcgroup(int pid, const char *subsystem, const char *cgpath)
+{
+	char filepath[MAXPATHLEN], *line = NULL, v1[MAXPATHLEN], v2[MAXPATHLEN];
+	FILE *f;
+	int ret, junk;
+	size_t sz = 0, l1 = strlen(cgpath), l2;
+	char *end = index(subsystem, '.');
+	int len = end ? (end - subsystem) : strlen(subsystem);
+
+	ret = snprintf(filepath, MAXPATHLEN, "/proc/%d/cgroup", pid);
+	if (ret < 0 || ret >= MAXPATHLEN)
+		return false;
+	if ((f = fopen(filepath, "r")) == NULL)
+		return false;
+	while (getline(&line, &sz, f) != -1) {
+		// nr:subsystem:path
+		v2[0] = v2[1] = '\0';
+		ret = sscanf(line, "%d:%[^:]:%s", &junk, v1, v2);
+		if (ret != 3) {
+			fclose(f);
+			return false;
+		}
+		len = end ? end - subsystem : strlen(subsystem);
+		if (strncmp(v1, subsystem, len) != 0)
+			continue;
+		// v2 will start with '/', skip it by using v2+1
+		// we must be in SUBcgroup, so make sure l2 > l1
+		l2 = strlen(v2+1);
+		if (l2 > l1 && strncmp(v2+1, cgpath, l1) == 0) {
+			fclose(f);
+			return true;
+		}
+	}
+	fclose(f);
+	return false;
+}
diff --git a/src/lxc/cgroup.h b/src/lxc/cgroup.h
index 747ff5c..c08b2f7 100644
--- a/src/lxc/cgroup.h
+++ b/src/lxc/cgroup.h
@@ -22,6 +22,7 @@
  */
 #ifndef _cgroup_h
 #define _cgroup_h
+#include <stdbool.h>
 
 struct lxc_handler;
 extern int lxc_cgroup_destroy(const char *cgpath);
@@ -32,4 +33,5 @@ extern char *lxc_cgroup_path_create(const char *lxcgroup, const char *name);
 extern int lxc_cgroup_enter(const char *cgpath, pid_t pid);
 extern int lxc_cgroup_attach(pid_t pid, const char *name, const char *lxcpath);
 extern char *cgroup_path_get(const char *subsystem, const char *cgpath);
+extern bool is_in_subcgroup(int pid, const char *subsystem, const char *cgpath);
 #endif
diff --git a/src/lxc/conf.c b/src/lxc/conf.c
index 37c0cb4..a69c4f8 100644
--- a/src/lxc/conf.c
+++ b/src/lxc/conf.c
@@ -313,7 +313,8 @@ static int run_buffer(char *buffer)
 }
 
 static int run_script_argv(const char *name, const char *section,
-		      const char *script, const char *hook, char **argsin)
+		      const char *script, const char *hook, const char *lxcpath,
+		      char **argsin)
 {
 	int ret, i;
 	char *buffer;
@@ -2782,7 +2783,7 @@ int uid_shift_ttys(int pid, struct lxc_conf *conf)
 	return 0;
 }
 
-int lxc_setup(const char *name, struct lxc_conf *lxc_conf)
+int lxc_setup(const char *name, struct lxc_conf *lxc_conf, const char *lxcpath)
 {
 #if HAVE_APPARMOR /* || HAVE_SMACK || HAVE_SELINUX */
 	int mounted;
@@ -2798,7 +2799,7 @@ int lxc_setup(const char *name, struct lxc_conf *lxc_conf)
 		return -1;
 	}
 
-	if (run_lxc_hooks(name, "pre-mount", lxc_conf, NULL)) {
+	if (run_lxc_hooks(name, "pre-mount", lxc_conf, lxcpath, NULL)) {
 		ERROR("failed to run pre-mount hooks for container '%s'.", name);
 		return -1;
 	}
@@ -2825,13 +2826,13 @@ int lxc_setup(const char *name, struct lxc_conf *lxc_conf)
 		return -1;
 	}
 
-	if (run_lxc_hooks(name, "mount", lxc_conf, NULL)) {
+	if (run_lxc_hooks(name, "mount", lxc_conf, lxcpath, NULL)) {
 		ERROR("failed to run mount hooks for container '%s'.", name);
 		return -1;
 	}
 
 	if (lxc_conf->autodev) {
-		if (run_lxc_hooks(name, "autodev", lxc_conf, NULL)) {
+		if (run_lxc_hooks(name, "autodev", lxc_conf, lxcpath, NULL)) {
 			ERROR("failed to run autodev hooks for container '%s'.", name);
 			return -1;
 		}
@@ -2902,7 +2903,8 @@ int lxc_setup(const char *name, struct lxc_conf *lxc_conf)
 	return 0;
 }
 
-int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf, char *argv[])
+int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf,
+		  const char *lxcpath, char *argv[])
 {
 	int which = -1;
 	struct lxc_list *it;
@@ -2926,7 +2928,7 @@ int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf, char *arg
 	lxc_list_for_each(it, &conf->hooks[which]) {
 		int ret;
 		char *hookname = it->elem;
-		ret = run_script_argv(name, "lxc", hookname, hook, argv);
+		ret = run_script_argv(name, "lxc", hookname, hook, lxcpath, argv);
 		if (ret)
 			return ret;
 	}
diff --git a/src/lxc/conf.h b/src/lxc/conf.h
index 9b1677e..ed3240d 100644
--- a/src/lxc/conf.h
+++ b/src/lxc/conf.h
@@ -290,7 +290,8 @@ struct lxc_conf {
 	char *rcfile;	// Copy of the top level rcfile we read
 };
 
-int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf, char *argv[]);
+int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf,
+		  const char *lxcpath, char *argv[]);
 
 extern int setup_cgroup(const char *cgpath, struct lxc_list *cgroups);
 extern int setup_cgroup_devices(const char *cgpath, struct lxc_list *cgroups);
@@ -326,7 +327,8 @@ extern int uid_shift_ttys(int pid, struct lxc_conf *conf);
  * Configure the container from inside
  */
 
-extern int lxc_setup(const char *name, struct lxc_conf *lxc_conf);
+extern int lxc_setup(const char *name, struct lxc_conf *lxc_conf,
+			const char *lxcpath);
 
 extern void lxc_rename_phys_nics_on_shutdown(struct lxc_conf *conf);
 #endif
diff --git a/src/lxc/lxccontainer.c b/src/lxc/lxccontainer.c
index 4e71fb1..56cae97 100644
--- a/src/lxc/lxccontainer.c
+++ b/src/lxc/lxccontainer.c
@@ -1810,7 +1810,7 @@ static int clone_update_rootfs(struct lxc_container *c, int flags, char **hookar
 			SYSERROR("failed to set environment variable for rootfs mount");
 		}
 
-		if (run_lxc_hooks(c->name, "clone", conf, hookargs)) {
+		if (run_lxc_hooks(c->name, "clone", conf, c->get_config_path(c), hookargs)) {
 			ERROR("Error executing clone hook for %s", c->name);
 			exit(1);
 		}
diff --git a/src/lxc/start.c b/src/lxc/start.c
index 8c8af9c..defa87b 100644
--- a/src/lxc/start.c
+++ b/src/lxc/start.c
@@ -315,7 +315,7 @@ struct lxc_handler *lxc_init(const char *name, struct lxc_conf *conf, const char
 	}
 	/* End of environment variable setup for hooks */
 
-	if (run_lxc_hooks(name, "pre-start", conf, NULL)) {
+	if (run_lxc_hooks(name, "pre-start", conf, handler->lxcpath, NULL)) {
 		ERROR("failed to run pre-start hooks for container '%s'.", name);
 		goto out_aborting;
 	}
@@ -368,7 +368,7 @@ static void lxc_fini(const char *name, struct lxc_handler *handler)
 	lxc_set_state(name, handler, STOPPING);
 	lxc_set_state(name, handler, STOPPED);
 
-	if (run_lxc_hooks(name, "post-stop", handler->conf, NULL))
+	if (run_lxc_hooks(name, "post-stop", handler->conf, handler->lxcpath, NULL))
 		ERROR("failed to run post-stop hooks for container '%s'.", name);
 
 	/* reset mask set by setup_signal_fd */
@@ -519,7 +519,7 @@ static int do_start(void *data)
 	#endif
 
 	/* Setup the container, ip, names, utsname, ... */
-	if (lxc_setup(handler->name, handler->conf)) {
+	if (lxc_setup(handler->name, handler->conf, handler->lxcpath)) {
 		ERROR("failed to setup the container");
 		goto out_warn_father;
 	}
@@ -534,7 +534,7 @@ static int do_start(void *data)
 	if (lxc_seccomp_load(handler->conf) != 0)
 		goto out_warn_father;
 
-	if (run_lxc_hooks(handler->name, "start", handler->conf, NULL)) {
+	if (run_lxc_hooks(handler->name, "start", handler->conf, handler->lxcpath, NULL)) {
 		ERROR("failed to run start hooks for container '%s'.", handler->name);
 		goto out_warn_father;
 	}
@@ -597,7 +597,7 @@ int save_phys_nics(struct lxc_conf *conf)
 	return 0;
 }
 
-
+extern bool is_in_subcgroup(int pid, const char *subsystem, const char *cgpath);
 int lxc_spawn(struct lxc_handler *handler)
 {
 	int failed_before_rename = 0;
@@ -703,8 +703,14 @@ int lxc_spawn(struct lxc_handler *handler)
 		goto out_delete_net;
 
 	if (setup_cgroup_devices(handler->cgroup, &handler->conf->cgroup)) {
-		ERROR("failed to setup the devices cgroup for '%s'", name);
-		goto out_delete_net;
+		/* an unfortunate special case: startup hooks may have already
+		 * setup the cgroup.  If a setting fails, and this is the devices
+		 * subsystem, *and* we are already in a subset of the cgroup,
+		 * then ignore the failure */
+		if (!is_in_subcgroup(handler->pid, "devices", handler->cgroup)) {
+			ERROR("failed to setup the devices cgroup for '%s'", name);
+			goto out_delete_net;
+		}
 	}
 
 	/* Tell the child to complete its initialization and wait for
-- 
1.8.1.2





More information about the lxc-devel mailing list