[lxc-devel] [PATCH RFC] Accomodate stricter devices cgroup rules
Serge Hallyn
serge.hallyn at ubuntu.com
Sat Jul 6 00:34:55 UTC 2013
3.10 kernel comes with proper hierarchical enforcement of devices
cgroup. To keep that code somewhat sane, certain things are not
allowed. Switching from default-allow to default-deny and vice versa
are not allowed when there are children cgroups. (This *could* be
simplified in the kernel by checking that all child cgroups are
unpopulated, but that has not yet been done and may be rejected)
The mountcgroup hook causes lxc-start to break with 3.10 kernels, because
you cannot write 'a' to devices.deny once you have a child cgroup. With
this patch, (a) lxcpath is passed to hooks, (b) the cgroup mount hook sets
the container's devices cgroup, and (c) setup_cgroup() during lxc startup
ignores failures to write to devices subsystem if we are already in a
child of the container's new cgroup.
((a) is not really related to this bug, but is definately needed.
The followup work of making the other hooks use the passed-in lxcpath
is still to be done)
Signed-off-by: Serge Hallyn <serge.hallyn at ubuntu.com>
---
hooks/mountcgroups | 22 ++++++++++++++++++++++
src/lxc/cgroup.c | 37 +++++++++++++++++++++++++++++++++++++
src/lxc/cgroup.h | 2 ++
src/lxc/conf.c | 16 +++++++++-------
src/lxc/conf.h | 6 ++++--
src/lxc/lxccontainer.c | 2 +-
src/lxc/start.c | 20 +++++++++++++-------
7 files changed, 88 insertions(+), 17 deletions(-)
diff --git a/hooks/mountcgroups b/hooks/mountcgroups
index 879fa3c..c1cdcd4 100755
--- a/hooks/mountcgroups
+++ b/hooks/mountcgroups
@@ -28,15 +28,37 @@ set -e
c=$1
d=/sys/fs/cgroup
d2=$LXC_ROOTFS_MOUNT/${d}
+# name lxc hook lxcpath
+lxcpath=$4
if [ ! -d "$d" ]; then
exit 0
fi
mount -n -t tmpfs tmpfs ${d2}
+do_devices_setup() {
+ local devdir="$1"
+ local c="$2"
+ local line
+ local w # which (allow or deny)
+ local v # value
+ egrep "^lxc.cgroup.devices.(allow|deny)[ \t]*=" ${lxcpath}/${c}/config | while read line; do
+ w=`echo $line | awk -F. '{ print $4 }' | awk '{ print $1 }'`
+ v=`echo $line | awk -F= '{ print $2 }'`
+ echo "$v" >> "$devdir"/devices.$w
+ done
+}
+
# XXX TODO - we'll need to account for other cgroup groups beside 'lxc',
# i.e. 'build' or 'users/joe'.
for dir in `/bin/ls $d`; do
+ if [ "$dir" = "devices" ]; then
+ devicesdir="${d}/${dir}/lxc/${c}"
+ mkdir -p "$devicesdir"
+ # set the devices cgroup perms now - we can't change from blacklist to
+ # whitelist, or add perms, once we have children.
+ do_devices_setup "$devicesdir" "${c}"
+ fi
mkdir -p "${d}/${dir}/lxc/${c}/${c}.real"
echo 1 > "${d}/${dir}/lxc/${c}/${c}.real/tasks"
mkdir -p ${d2}/${dir}
diff --git a/src/lxc/cgroup.c b/src/lxc/cgroup.c
index c28e249..38ed514 100644
--- a/src/lxc/cgroup.c
+++ b/src/lxc/cgroup.c
@@ -897,3 +897,40 @@ int lxc_cgroup_attach(pid_t pid, const char *name, const char *lxcpath)
free(dirpath);
return ret;
}
+
+bool is_in_subcgroup(int pid, const char *subsystem, const char *cgpath)
+{
+ char filepath[MAXPATHLEN], *line = NULL, v1[MAXPATHLEN], v2[MAXPATHLEN];
+ FILE *f;
+ int ret, junk;
+ size_t sz = 0, l1 = strlen(cgpath), l2;
+ char *end = index(subsystem, '.');
+ int len = end ? (end - subsystem) : strlen(subsystem);
+
+ ret = snprintf(filepath, MAXPATHLEN, "/proc/%d/cgroup", pid);
+ if (ret < 0 || ret >= MAXPATHLEN)
+ return false;
+ if ((f = fopen(filepath, "r")) == NULL)
+ return false;
+ while (getline(&line, &sz, f) != -1) {
+ // nr:subsystem:path
+ v2[0] = v2[1] = '\0';
+ ret = sscanf(line, "%d:%[^:]:%s", &junk, v1, v2);
+ if (ret != 3) {
+ fclose(f);
+ return false;
+ }
+ len = end ? end - subsystem : strlen(subsystem);
+ if (strncmp(v1, subsystem, len) != 0)
+ continue;
+ // v2 will start with '/', skip it by using v2+1
+ // we must be in SUBcgroup, so make sure l2 > l1
+ l2 = strlen(v2+1);
+ if (l2 > l1 && strncmp(v2+1, cgpath, l1) == 0) {
+ fclose(f);
+ return true;
+ }
+ }
+ fclose(f);
+ return false;
+}
diff --git a/src/lxc/cgroup.h b/src/lxc/cgroup.h
index 747ff5c..c08b2f7 100644
--- a/src/lxc/cgroup.h
+++ b/src/lxc/cgroup.h
@@ -22,6 +22,7 @@
*/
#ifndef _cgroup_h
#define _cgroup_h
+#include <stdbool.h>
struct lxc_handler;
extern int lxc_cgroup_destroy(const char *cgpath);
@@ -32,4 +33,5 @@ extern char *lxc_cgroup_path_create(const char *lxcgroup, const char *name);
extern int lxc_cgroup_enter(const char *cgpath, pid_t pid);
extern int lxc_cgroup_attach(pid_t pid, const char *name, const char *lxcpath);
extern char *cgroup_path_get(const char *subsystem, const char *cgpath);
+extern bool is_in_subcgroup(int pid, const char *subsystem, const char *cgpath);
#endif
diff --git a/src/lxc/conf.c b/src/lxc/conf.c
index 37c0cb4..a69c4f8 100644
--- a/src/lxc/conf.c
+++ b/src/lxc/conf.c
@@ -313,7 +313,8 @@ static int run_buffer(char *buffer)
}
static int run_script_argv(const char *name, const char *section,
- const char *script, const char *hook, char **argsin)
+ const char *script, const char *hook, const char *lxcpath,
+ char **argsin)
{
int ret, i;
char *buffer;
@@ -2782,7 +2783,7 @@ int uid_shift_ttys(int pid, struct lxc_conf *conf)
return 0;
}
-int lxc_setup(const char *name, struct lxc_conf *lxc_conf)
+int lxc_setup(const char *name, struct lxc_conf *lxc_conf, const char *lxcpath)
{
#if HAVE_APPARMOR /* || HAVE_SMACK || HAVE_SELINUX */
int mounted;
@@ -2798,7 +2799,7 @@ int lxc_setup(const char *name, struct lxc_conf *lxc_conf)
return -1;
}
- if (run_lxc_hooks(name, "pre-mount", lxc_conf, NULL)) {
+ if (run_lxc_hooks(name, "pre-mount", lxc_conf, lxcpath, NULL)) {
ERROR("failed to run pre-mount hooks for container '%s'.", name);
return -1;
}
@@ -2825,13 +2826,13 @@ int lxc_setup(const char *name, struct lxc_conf *lxc_conf)
return -1;
}
- if (run_lxc_hooks(name, "mount", lxc_conf, NULL)) {
+ if (run_lxc_hooks(name, "mount", lxc_conf, lxcpath, NULL)) {
ERROR("failed to run mount hooks for container '%s'.", name);
return -1;
}
if (lxc_conf->autodev) {
- if (run_lxc_hooks(name, "autodev", lxc_conf, NULL)) {
+ if (run_lxc_hooks(name, "autodev", lxc_conf, lxcpath, NULL)) {
ERROR("failed to run autodev hooks for container '%s'.", name);
return -1;
}
@@ -2902,7 +2903,8 @@ int lxc_setup(const char *name, struct lxc_conf *lxc_conf)
return 0;
}
-int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf, char *argv[])
+int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf,
+ const char *lxcpath, char *argv[])
{
int which = -1;
struct lxc_list *it;
@@ -2926,7 +2928,7 @@ int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf, char *arg
lxc_list_for_each(it, &conf->hooks[which]) {
int ret;
char *hookname = it->elem;
- ret = run_script_argv(name, "lxc", hookname, hook, argv);
+ ret = run_script_argv(name, "lxc", hookname, hook, lxcpath, argv);
if (ret)
return ret;
}
diff --git a/src/lxc/conf.h b/src/lxc/conf.h
index 9b1677e..ed3240d 100644
--- a/src/lxc/conf.h
+++ b/src/lxc/conf.h
@@ -290,7 +290,8 @@ struct lxc_conf {
char *rcfile; // Copy of the top level rcfile we read
};
-int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf, char *argv[]);
+int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf,
+ const char *lxcpath, char *argv[]);
extern int setup_cgroup(const char *cgpath, struct lxc_list *cgroups);
extern int setup_cgroup_devices(const char *cgpath, struct lxc_list *cgroups);
@@ -326,7 +327,8 @@ extern int uid_shift_ttys(int pid, struct lxc_conf *conf);
* Configure the container from inside
*/
-extern int lxc_setup(const char *name, struct lxc_conf *lxc_conf);
+extern int lxc_setup(const char *name, struct lxc_conf *lxc_conf,
+ const char *lxcpath);
extern void lxc_rename_phys_nics_on_shutdown(struct lxc_conf *conf);
#endif
diff --git a/src/lxc/lxccontainer.c b/src/lxc/lxccontainer.c
index 4e71fb1..56cae97 100644
--- a/src/lxc/lxccontainer.c
+++ b/src/lxc/lxccontainer.c
@@ -1810,7 +1810,7 @@ static int clone_update_rootfs(struct lxc_container *c, int flags, char **hookar
SYSERROR("failed to set environment variable for rootfs mount");
}
- if (run_lxc_hooks(c->name, "clone", conf, hookargs)) {
+ if (run_lxc_hooks(c->name, "clone", conf, c->get_config_path(c), hookargs)) {
ERROR("Error executing clone hook for %s", c->name);
exit(1);
}
diff --git a/src/lxc/start.c b/src/lxc/start.c
index 8c8af9c..defa87b 100644
--- a/src/lxc/start.c
+++ b/src/lxc/start.c
@@ -315,7 +315,7 @@ struct lxc_handler *lxc_init(const char *name, struct lxc_conf *conf, const char
}
/* End of environment variable setup for hooks */
- if (run_lxc_hooks(name, "pre-start", conf, NULL)) {
+ if (run_lxc_hooks(name, "pre-start", conf, handler->lxcpath, NULL)) {
ERROR("failed to run pre-start hooks for container '%s'.", name);
goto out_aborting;
}
@@ -368,7 +368,7 @@ static void lxc_fini(const char *name, struct lxc_handler *handler)
lxc_set_state(name, handler, STOPPING);
lxc_set_state(name, handler, STOPPED);
- if (run_lxc_hooks(name, "post-stop", handler->conf, NULL))
+ if (run_lxc_hooks(name, "post-stop", handler->conf, handler->lxcpath, NULL))
ERROR("failed to run post-stop hooks for container '%s'.", name);
/* reset mask set by setup_signal_fd */
@@ -519,7 +519,7 @@ static int do_start(void *data)
#endif
/* Setup the container, ip, names, utsname, ... */
- if (lxc_setup(handler->name, handler->conf)) {
+ if (lxc_setup(handler->name, handler->conf, handler->lxcpath)) {
ERROR("failed to setup the container");
goto out_warn_father;
}
@@ -534,7 +534,7 @@ static int do_start(void *data)
if (lxc_seccomp_load(handler->conf) != 0)
goto out_warn_father;
- if (run_lxc_hooks(handler->name, "start", handler->conf, NULL)) {
+ if (run_lxc_hooks(handler->name, "start", handler->conf, handler->lxcpath, NULL)) {
ERROR("failed to run start hooks for container '%s'.", handler->name);
goto out_warn_father;
}
@@ -597,7 +597,7 @@ int save_phys_nics(struct lxc_conf *conf)
return 0;
}
-
+extern bool is_in_subcgroup(int pid, const char *subsystem, const char *cgpath);
int lxc_spawn(struct lxc_handler *handler)
{
int failed_before_rename = 0;
@@ -703,8 +703,14 @@ int lxc_spawn(struct lxc_handler *handler)
goto out_delete_net;
if (setup_cgroup_devices(handler->cgroup, &handler->conf->cgroup)) {
- ERROR("failed to setup the devices cgroup for '%s'", name);
- goto out_delete_net;
+ /* an unfortunate special case: startup hooks may have already
+ * setup the cgroup. If a setting fails, and this is the devices
+ * subsystem, *and* we are already in a subset of the cgroup,
+ * then ignore the failure */
+ if (!is_in_subcgroup(handler->pid, "devices", handler->cgroup)) {
+ ERROR("failed to setup the devices cgroup for '%s'", name);
+ goto out_delete_net;
+ }
}
/* Tell the child to complete its initialization and wait for
--
1.8.1.2
More information about the lxc-devel
mailing list