[lxc-devel] [PATCH RFC] Enable use of user namespaces in containers

Serge Hallyn serge.hallyn at canonical.com
Thu Dec 6 15:02:37 UTC 2012


The rootfs will need to be chowned to the mapped userids, which can
be done with the /usr/bin/uidmapshift tool shipped with the nsexec
package in ppa:serge-hallyn/userns-natty.
The container config supports new entries of the form:
 lxc.id_map = U 100000 0 10000
 lxc.id_map = G 100000 0 10000
meaning map 'virtual' uids (in the container) 0-10000 to uids
100000-110000 on the host, and same for gids.  So long as there are
mappings specified in the container config, then CONFIG_NEWUSER will
be used when the container is cloned.  This means that container
setup is no longer done with root privilege on the host, only root
privilege in the container.  Therefore cgroup setup is moved from the
init task to the monitor task.

To use this patchset, you currently need to either use the raring
kernel at ppa:serge-hallyn/usern-natty, or build your own kernel
from either git://kernel.ubuntu.com/serge/quantal-userns.git branch
master-next.dec3.userns or branch userns-always-map-user-v76 at
git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace.git
plus a patch enabling tmpfs mounts in userns.

You also need to chown the files in the container rootfs into the
mapped range.  There is a utility at
https://code.launchpad.net/~serge-hallyn/+junk/nsexec to do this.
uidmapshift does the chowning, while the container-userns-convert
script nicely wraps that program.  So I simply

	sudo lxc-create -t ubuntu -n r1
	sudo container-userns-convert r1 200000

will create a container which is shifted so uid 0 in the container
is uid 200000 on the host.

TODO: when doing setuid(0), need to only do that if 0 is one of the
ids we map to.  Similarly, when dropping capabilities, need to only
not do that if 0 is one of the ids we map to.

Signed-off-by: Serge Hallyn <serge.hallyn at ubuntu.com>
---
 src/lxc/conf.c    |  141 +++++++++++++++++++++++++++++++++++++++++++++++++----
 src/lxc/conf.h    |   26 ++++++++++
 src/lxc/confile.c |   60 +++++++++++++++++++++++
 src/lxc/start.c   |   35 +++++++++++++
 4 files changed, 253 insertions(+), 9 deletions(-)

diff --git a/src/lxc/conf.c b/src/lxc/conf.c
index 79d96d7..1a619d0 100644
--- a/src/lxc/conf.c
+++ b/src/lxc/conf.c
@@ -1221,7 +1221,7 @@ static int setup_kmsg(const struct lxc_rootfs *rootfs,
 	return 0;
 }
 
-static int setup_cgroup(const char *name, struct lxc_list *cgroups)
+int setup_cgroup(const char *name, struct lxc_list *cgroups)
 {
 	struct lxc_list *iterator;
 	struct lxc_cgroup *cg;
@@ -1882,6 +1882,7 @@ struct lxc_conf *lxc_conf_init(void)
 	lxc_list_init(&new->network);
 	lxc_list_init(&new->mount_list);
 	lxc_list_init(&new->caps);
+	lxc_list_init(&new->id_map);
 	for (i=0; i<NUM_LXC_HOOKS; i++)
 		lxc_list_init(&new->hooks[i]);
 #if HAVE_APPARMOR
@@ -2256,6 +2257,44 @@ int lxc_assign_network(struct lxc_list *network, pid_t pid)
 	return 0;
 }
 
+int add_id_mapping(enum idtype idtype, pid_t pid, uid_t host_start, uid_t ns_start, int range)
+{
+        char path[PATH_MAX];
+        int ret;
+        FILE *f;
+
+        ret = snprintf(path, PATH_MAX, "/proc/%d/%cid_map", pid, idtype == ID_TYPE_UID ? 'u' : 'g');
+        if (ret < 0 || ret >= PATH_MAX) {
+                fprintf(stderr, "%s: path name too long", __func__);
+                return -E2BIG;
+        }
+        f = fopen(path, "w");
+        if (!f) {
+                perror("open");
+                return -EINVAL;
+        }
+        ret = fprintf(f, "%d %d %d", ns_start, host_start, range);
+        if (ret < 0)
+                perror("write");
+        fclose(f);
+        return ret < 0 ? ret : 0;
+}
+
+int lxc_map_ids(struct lxc_list *idmap, pid_t pid)
+{
+	struct lxc_list *iterator;
+	struct id_map *map;
+	int ret = 0;
+
+	lxc_list_for_each(iterator, idmap) {
+		map = iterator->elem;
+		ret = add_id_mapping(map->idtype, pid, map->hostid, map->nsid, map->range);
+		if (ret)
+			break;
+	}
+	return ret;
+}
+
 int lxc_find_gateway_addresses(struct lxc_handler *handler)
 {
 	struct lxc_list *network = &handler->conf->network;
@@ -2364,6 +2403,93 @@ void lxc_delete_tty(struct lxc_tty_info *tty_info)
 	tty_info->nbtty = 0;
 }
 
+/*
+ * given a host uid, return the ns uid if it is mapped.
+ * if it is not mapped, return the original host id.
+ */
+static int shiftid(struct lxc_conf *c, int uid, enum idtype w)
+{
+	struct lxc_list *iterator;
+	struct id_map *map;
+	int low, high;
+
+	lxc_list_for_each(iterator, &c->id_map) {
+		map = iterator->elem;
+		if (map->idtype != w)
+			continue;
+
+		low = map->nsid;
+		high = map->nsid + map->range;
+		if (uid < low || uid >= high)
+			continue;
+
+		return uid - low + map->hostid;
+	}
+
+	return uid;
+}
+
+/*
+ * Take a pathname for a file created on the host, and map the uid and gid
+ * into the container if needed.  (Used for ttys)
+ */
+static int uid_shift_file(char *path, struct lxc_conf *c)
+{
+	struct stat statbuf;
+	int newuid, newgid;
+
+	if (stat(path, &statbuf)) {
+		SYSERROR("stat(%s)", path);
+		return -1;
+	}
+
+	newuid = shiftid(c, statbuf.st_uid, ID_TYPE_UID);
+	newgid = shiftid(c, statbuf.st_gid, ID_TYPE_GID);
+	if (newuid != statbuf.st_uid || newgid != statbuf.st_gid) {
+		DEBUG("chowning %s from %d:%d to %d:%d\n", path, statbuf.st_uid, statbuf.st_gid, newuid, newgid);
+		if (chown(path, newuid, newgid)) {
+			SYSERROR("chown(%s)", path);
+			return -1;
+		}
+	}
+	return 0;
+}
+
+int uid_shift_ttys(int pid, struct lxc_conf *conf)
+{
+	int i, ret;
+	struct lxc_tty_info *tty_info = &conf->tty_info;
+	char path[MAXPATHLEN];
+	char *ttydir = conf->ttydir;
+
+	if (!conf->rootfs.path)
+		return 0;
+	/* first the console */
+	ret = snprintf(path, sizeof(path), "/proc/%d/root/dev/%s/console", pid, ttydir ? ttydir : "");
+	if (ret < 0 || ret >= sizeof(path)) {
+		ERROR("console path too long\n");
+		return -1;
+	}
+	if (uid_shift_file(path, conf)) {
+		DEBUG("Failed to chown the console %s.\n", path);
+		return -1;
+	}
+	for (i=0; i< tty_info->nbtty; i++) {
+		ret = snprintf(path, sizeof(path), "/proc/%d/root/dev/%s/tty%d",
+			pid, ttydir ? ttydir : "", i + 1);
+		if (ret < 0 || ret >= sizeof(path)) {
+			ERROR("pathname too long for ttys");
+			return -1;
+		}
+		if (uid_shift_file(path, conf)) {
+			DEBUG("Failed to chown pty %s.\n", path);
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
 int lxc_setup(const char *name, struct lxc_conf *lxc_conf)
 {
 #if HAVE_APPARMOR /* || HAVE_SMACK || HAVE_SELINUX */
@@ -2419,11 +2545,6 @@ int lxc_setup(const char *name, struct lxc_conf *lxc_conf)
 		}
 	}
 
-	if (setup_cgroup(name, &lxc_conf->cgroup)) {
-		ERROR("failed to setup the cgroups for '%s'", name);
-		return -1;
-	}
-
 	if (setup_console(&lxc_conf->rootfs, &lxc_conf->console, lxc_conf->ttydir)) {
 		ERROR("failed to setup the console for '%s'", name);
 		return -1;
@@ -2467,9 +2588,11 @@ int lxc_setup(const char *name, struct lxc_conf *lxc_conf)
 		return -1;
 	}
 
-	if (setup_caps(&lxc_conf->caps)) {
-		ERROR("failed to drop capabilities");
-		return -1;
+	if (lxc_list_empty(&lxc_conf->id_map)) {
+		if (setup_caps(&lxc_conf->caps)) {
+			ERROR("failed to drop capabilities");
+			return -1;
+		}
 	}
 
 	NOTICE("'%s' is setup.", name);
diff --git a/src/lxc/conf.h b/src/lxc/conf.h
index 694bce4..97b9274 100644
--- a/src/lxc/conf.h
+++ b/src/lxc/conf.h
@@ -137,6 +137,26 @@ struct lxc_cgroup {
 	char *value;
 };
 
+enum idtype {
+	ID_TYPE_UID,
+	ID_TYPE_GID
+};
+
+/*
+ * id_map is an id map entry.  Form in confile is:
+ * lxc.id_map = U 9800 0 100
+ * lxc.id_map = U 9900 1000 100
+ * lxc.id_map = G 9800 0 100
+ * lxc.id_map = G 9900 1000 100
+ * meaning the container can use uids and gids 0-100 and 1000-1100,
+ * with uid 0 mapping to uid 9800 on the host, and gid 1000 to
+ * gid 9900 on the host.
+ */
+struct id_map {
+	enum idtype idtype;
+	int hostid, nsid, range;
+};
+
 /*
  * Defines a structure containing a pty information for
  * virtualizing a tty
@@ -220,6 +240,7 @@ struct lxc_conf {
 	int personality;
 	struct utsname *utsname;
 	struct lxc_list cgroup;
+	struct lxc_list id_map;
 	struct lxc_list network;
 	struct lxc_list mount_list;
 	struct lxc_list caps;
@@ -256,6 +277,7 @@ extern int pin_rootfs(const char *rootfs);
 extern int lxc_create_network(struct lxc_handler *handler);
 extern void lxc_delete_network(struct lxc_handler *handler);
 extern int lxc_assign_network(struct lxc_list *networks, pid_t pid);
+extern int lxc_map_ids(struct lxc_list *idmap, pid_t pid);
 extern int lxc_find_gateway_addresses(struct lxc_handler *handler);
 
 extern int lxc_create_tty(const char *name, struct lxc_conf *conf);
@@ -268,6 +290,10 @@ extern int lxc_clear_cgroups(struct lxc_conf *c, const char *key);
 extern int lxc_clear_mount_entries(struct lxc_conf *c);
 extern int lxc_clear_hooks(struct lxc_conf *c, const char *key);
 
+extern int setup_cgroup(const char *name, struct lxc_list *cgroups);
+
+extern int uid_shift_ttys(int pid, struct lxc_conf *conf);
+
 /*
  * Configure the container from inside
  */
diff --git a/src/lxc/confile.c b/src/lxc/confile.c
index a64ae09..1fa6189 100644
--- a/src/lxc/confile.c
+++ b/src/lxc/confile.c
@@ -55,6 +55,7 @@ static int config_ttydir(const char *, const char *, struct lxc_conf *);
 static int config_aa_profile(const char *, const char *, struct lxc_conf *);
 #endif
 static int config_cgroup(const char *, const char *, struct lxc_conf *);
+static int config_idmap(const char *, const char *, struct lxc_conf *);
 static int config_loglevel(const char *, const char *, struct lxc_conf *);
 static int config_logfile(const char *, const char *, struct lxc_conf *);
 static int config_mount(const char *, const char *, struct lxc_conf *);
@@ -94,6 +95,7 @@ static struct lxc_config_t config[] = {
 	{ "lxc.aa_profile",            config_aa_profile          },
 #endif
 	{ "lxc.cgroup",               config_cgroup               },
+	{ "lxc.id_map",               config_idmap                },
 	{ "lxc.loglevel",             config_loglevel             },
 	{ "lxc.logfile",              config_logfile              },
 	{ "lxc.mount",                config_mount                },
@@ -1021,6 +1023,64 @@ out:
 	return -1;
 }
 
+static int config_idmap(const char *key, const char *value, struct lxc_conf *lxc_conf)
+{
+	char *token = "lxc.id_map";
+	char *subkey;
+	struct lxc_list *idmaplist = NULL;
+	struct id_map *idmap = NULL;
+	int hostid, nsid, range;
+	char type;
+	int ret;
+
+	subkey = strstr(key, token);
+
+	if (!subkey)
+		return -1;
+
+	if (!strlen(subkey))
+		return -1;
+
+	idmaplist = malloc(sizeof(*idmaplist));
+	if (!idmaplist)
+		goto out;
+
+	idmap = malloc(sizeof(*idmap));
+	if (!idmap)
+		goto out;
+	memset(idmap, 0, sizeof(*idmap));
+
+	idmaplist->elem = idmap;
+
+	lxc_list_add_tail(&lxc_conf->id_map, idmaplist);
+
+	ret = sscanf(value, "%c %d %d %d", &type, &hostid, &nsid, &range);
+	if (ret != 4)
+		goto out;
+	INFO("read uid map: type %c hostid %d nsid %d range %d", type, hostid, nsid, range);
+	if (type == 'U')
+		idmap->idtype = ID_TYPE_UID;
+	else if (type == 'G')
+		idmap->idtype = ID_TYPE_GID;
+	else 
+		goto out;
+	idmap->hostid = hostid;
+	idmap->nsid = nsid;
+	idmap->range = range;
+
+	return 0;
+
+out:
+	if (idmaplist)
+		free(idmaplist);
+
+	if (idmap) {
+		free(idmap);
+	}
+
+	return -1;
+}
+
 static int config_path_item(const char *key, const char *value,
 			    struct lxc_conf *lxc_conf, char **conf_item)
 {
diff --git a/src/lxc/start.c b/src/lxc/start.c
index 3e26b27..8d03b69 100644
--- a/src/lxc/start.c
+++ b/src/lxc/start.c
@@ -542,6 +542,22 @@ static int do_start(void *data)
 	if (lxc_sync_barrier_parent(handler, LXC_SYNC_CONFIGURE))
 		return -1;
 
+	/*
+	 * if we are in a new user namespace, become root there to have
+	 * privilege over our namespace
+	 */
+	if (!lxc_list_empty(&handler->conf->id_map)) {
+		NOTICE("switching to gid/uid 0 in new user namespace");
+		if (setgid(0)) {
+			SYSERROR("setgid");
+			exit(1);
+		}
+		if (setuid(0)) {
+			SYSERROR("setuid");
+			exit(1);
+		}
+	}
+
 	if (handler->conf->need_utmp_watch) {
 		if (prctl(PR_CAPBSET_DROP, CAP_SYS_BOOT, 0, 0, 0)) {
 			SYSERROR("failed to remove CAP_SYS_BOOT capability");
@@ -589,6 +605,10 @@ int lxc_spawn(struct lxc_handler *handler)
 		return -1;
 
 	handler->clone_flags = CLONE_NEWUTS|CLONE_NEWPID|CLONE_NEWIPC|CLONE_NEWNS;
+	if (!lxc_list_empty(&handler->conf->id_map)) {
+		INFO("Cloning a new user namespace");
+		handler->clone_flags |= CLONE_NEWUSER;
+	}
 	if (!lxc_list_empty(&handler->conf->network)) {
 
 		handler->clone_flags |= CLONE_NEWNET;
@@ -650,12 +670,27 @@ int lxc_spawn(struct lxc_handler *handler)
 		}
 	}
 
+	if (lxc_map_ids(&handler->conf->id_map, handler->pid)) {
+		ERROR("failed to set up id mapping");
+		goto out_delete_net;
+	}
+
 	/* Tell the child to continue its initialization and wait for
 	 * it to exec or return an error
 	 */
 	if (lxc_sync_barrier_child(handler, LXC_SYNC_POST_CONFIGURE))
 		return -1;
 
+	if (setup_cgroup(name, &handler->conf->cgroup)) {
+		ERROR("failed to setup the cgroups for '%s'", name);
+		goto out_delete_net;
+	}
+
+	/* If child is in a fresh user namespace, chown his ptys for
+	 * him */
+	if (uid_shift_ttys(handler->pid, handler->conf))
+		DEBUG("Failed to chown ptys.\n");
+
 	if (handler->ops->post_start(handler, handler->data))
 		goto out_abort;
 
-- 
1.7.10.4





More information about the lxc-devel mailing list