[lxc-devel] [PATCH 1/3] container creation: support unpriv container creation in user namespaces
Stéphane Graber
stgraber at ubuntu.com
Wed Oct 23 23:10:37 UTC 2013
On Wed, Oct 23, 2013 at 01:02:57AM +0000, Serge Hallyn wrote:
> From: Serge Hallyn <serge.hallyn at ubuntu.com>
>
> 1. lxcapi_create: don't try to unshare and mount for dir backed containers
>
> It's unnecessary, and breaks unprivileged lxc-create (since unpriv users
> cannot yet unshare(CLONE_NEWNS)).
>
> 2. api_create: chown rootfs
>
> chown rootfs to the host uid to which container root will be mapped
>
> 3. create: run template in a mapped user ns
>
> 4. use (setuid-root) newxidmap to set id_map if we are not root
>
> This is needed to be able to set userns mappings as an unprivileged
> user, for unprivileged lxc-start.
>
> Signed-off-by: Serge Hallyn <serge.hallyn at ubuntu.com>
Acked-by: Stéphane Graber <stgraber at ubuntu.com>
> ---
> src/lxc/conf.c | 102 +++++++++++++++++++++++++-----
> src/lxc/conf.h | 4 ++
> src/lxc/lxccontainer.c | 164 ++++++++++++++++++++++++++++++++++++++++++++-----
> 3 files changed, 240 insertions(+), 30 deletions(-)
>
> diff --git a/src/lxc/conf.c b/src/lxc/conf.c
> index 208c08b..3f7f0ef 100644
> --- a/src/lxc/conf.c
> +++ b/src/lxc/conf.c
> @@ -2802,31 +2802,49 @@ int lxc_map_ids(struct lxc_list *idmap, pid_t pid)
> int ret = 0;
> enum idtype type;
> char *buf = NULL, *pos;
> + int am_root = (getuid() == 0);
>
> for(type = ID_TYPE_UID; type <= ID_TYPE_GID; type++) {
> int left, fill;
> -
> - pos = buf;
> - lxc_list_for_each(iterator, idmap) {
> - /* The kernel only takes <= 4k for writes to /proc/<nr>/[ug]id_map */
> - if (!buf)
> - buf = pos = malloc(4096);
> + int had_entry = 0;
> + if (!buf) {
> + buf = pos = malloc(4096);
> if (!buf)
> return -ENOMEM;
> + }
> + pos = buf;
> + if (!am_root)
> + pos += sprintf(buf, "new%cidmap %d ",
> + type == ID_TYPE_UID ? 'u' : 'g',
> + pid);
>
> + lxc_list_for_each(iterator, idmap) {
> + /* The kernel only takes <= 4k for writes to /proc/<nr>/[ug]id_map */
> map = iterator->elem;
> - if (map->idtype == type) {
> - left = 4096 - (pos - buf);
> - fill = snprintf(pos, left, "%lu %lu %lu\n",
> - map->nsid, map->hostid, map->range);
> - if (fill <= 0 || fill >= left)
> - SYSERROR("snprintf failed, too many mappings");
> - pos += fill;
> - }
> + if (map->idtype != type)
> + continue;
> +
> + had_entry = 1;
> + left = 4096 - (pos - buf);
> + fill = snprintf(pos, left, " %lu %lu %lu", map->nsid,
> + map->hostid, map->range);
> + if (fill <= 0 || fill >= left)
> + SYSERROR("snprintf failed, too many mappings");
> + pos += fill;
> }
> - if (pos == buf) // no mappings were found
> + if (!had_entry)
> continue;
> - ret = write_id_mapping(type, pid, buf, pos-buf);
> + left = 4096 - (pos - buf);
> + fill = snprintf(pos, left, "\n");
> + if (fill <= 0 || fill >= left)
> + SYSERROR("snprintf failed, too many mappings");
> + pos += fill;
> +
> + if (am_root)
> + ret = write_id_mapping(type, pid, buf, pos-buf);
> + else
> + ret = system(buf);
> +
> if (ret)
> break;
> }
> @@ -2836,6 +2854,58 @@ int lxc_map_ids(struct lxc_list *idmap, pid_t pid)
> return ret;
> }
>
> +/*
> + * return the host uid to which the container root is mapped, or -1 on
> + * error
> + */
> +int get_mapped_rootid(struct lxc_conf *conf)
> +{
> + struct lxc_list *it;
> + struct id_map *map;
> +
> + lxc_list_for_each(it, &conf->id_map) {
> + map = it->elem;
> + if (map->idtype != ID_TYPE_UID)
> + continue;
> + if (map->nsid != 0)
> + continue;
> + return map->hostid;
> + }
> + return -1;
> +}
> +
> +bool hostid_is_mapped(int id, struct lxc_conf *conf)
> +{
> + struct lxc_list *it;
> + struct id_map *map;
> + lxc_list_for_each(it, &conf->id_map) {
> + map = it->elem;
> + if (map->idtype != ID_TYPE_UID)
> + continue;
> + if (id >= map->hostid && id < map->hostid + map->range)
> + return true;
> + }
> + return false;
> +}
> +
> +int find_unmapped_nsuid(struct lxc_conf *conf)
> +{
> + struct lxc_list *it;
> + struct id_map *map;
> + uid_t freeid = 0;
> +again:
> + lxc_list_for_each(it, &conf->id_map) {
> + map = it->elem;
> + if (map->idtype != ID_TYPE_UID)
> + continue;
> + if (freeid >= map->nsid && freeid < map->nsid + map->range) {
> + freeid = map->nsid + map->range;
> + goto again;
> + }
> + }
> + return freeid;
> +}
> +
> int lxc_find_gateway_addresses(struct lxc_handler *handler)
> {
> struct lxc_list *network = &handler->conf->network;
> diff --git a/src/lxc/conf.h b/src/lxc/conf.h
> index 84acce8..445867d 100644
> --- a/src/lxc/conf.h
> +++ b/src/lxc/conf.h
> @@ -361,4 +361,8 @@ extern int lxc_setup(const char *name, struct lxc_conf *lxc_conf,
> const char *lxcpath, struct cgroup_process_info *cgroup_info);
>
> extern void lxc_rename_phys_nics_on_shutdown(struct lxc_conf *conf);
> +
> +extern int get_mapped_rootid(struct lxc_conf *conf);
> +extern int find_unmapped_nsuid(struct lxc_conf *conf);
> +extern bool hostid_is_mapped(int id, struct lxc_conf *conf);
> #endif
> diff --git a/src/lxc/lxccontainer.c b/src/lxc/lxccontainer.c
> index c8ecef3..816eb39 100644
> --- a/src/lxc/lxccontainer.c
> +++ b/src/lxc/lxccontainer.c
> @@ -694,6 +694,49 @@ static const char *lxcapi_get_config_path(struct lxc_container *c);
> static bool lxcapi_set_config_item(struct lxc_container *c, const char *key, const char *v);
>
> /*
> + * chown_mapped: for an unprivileged user with uid X to chown a dir
> + * to subuid Y, he needs to run chown as root in a userns where
> + * nsid 0 is mapped to hostuid Y, and nsid Y is mapped to hostuid
> + * X. That way, the container root is privileged with respect to
> + * hostuid X, allowing him to do the chown.
> + */
> +static int chown_mapped(int nsrootid, char *path)
> +{
> + if (nsrootid < 0)
> + return nsrootid;
> + pid_t pid = fork();
> + if (pid < 0) {
> + SYSERROR("Failed forking");
> + return -1;
> + }
> + if (!pid) {
> + int hostuid = geteuid(), ret;
> + char map1[100], map2[100];
> + char *args[] = {"lxc-usernsexec", "-m", map1, "-m", map2, "--", "chown",
> + "0", path, NULL};
> +
> + // "b:0:nsrootid:1"
> + ret = snprintf(map1, 100, "b:0:%d:1", nsrootid);
> + if (ret < 0 || ret >= 100) {
> + ERROR("Error uid printing map string");
> + return -1;
> + }
> +
> + // "b:hostuid:hostuid:1"
> + ret = snprintf(map2, 100, "b:%d:%d:1", hostuid, hostuid);
> + if (ret < 0 || ret >= 100) {
> + ERROR("Error uid printing map string");
> + return -1;
> + }
> +
> + ret = execvp("lxc-usernsexec", args);
> + SYSERROR("Failed executing lxc-usernsexec");
> + exit(1);
> + }
> + return wait_for_pid(pid);
> +}
> +
> +/*
> * do_bdev_create: thin wrapper around bdev_create(). Like bdev_create(),
> * it returns a mounted bdev on success, NULL on error.
> */
> @@ -720,6 +763,25 @@ static struct bdev *do_bdev_create(struct lxc_container *c, const char *type,
> }
>
> lxcapi_set_config_item(c, "lxc.rootfs", bdev->src);
> +
> + /* if we are not root, chown the rootfs dir to root in the
> + * target uidmap */
> +
> + if (geteuid() != 0) {
> + int rootid;
> + if ((rootid = get_mapped_rootid(c->lxc_conf)) <= 0) {
> + ERROR("No mapping for container root");
> + bdev_put(bdev);
> + return NULL;
> + }
> + ret = chown_mapped(rootid, bdev->dest);
> + if (ret < 0) {
> + ERROR("Error chowning %s to %d\n", bdev->dest, rootid);
> + bdev_put(bdev);
> + return NULL;
> + }
> + }
> +
> return bdev;
> }
>
> @@ -785,6 +847,7 @@ static bool create_run_template(struct lxc_container *c, char *tpath, bool quiet
> int i;
> int ret, len, nargs = 0;
> char **newargv;
> + struct lxc_conf *conf = c->lxc_conf;
>
> process_unlock(); // we're no longer sharing
> if (quiet) {
> @@ -795,10 +858,6 @@ static bool create_run_template(struct lxc_container *c, char *tpath, bool quiet
> open("/dev/null", O_RDWR);
> open("/dev/null", O_RDWR);
> }
> - if (unshare(CLONE_NEWNS) < 0) {
> - ERROR("error unsharing mounts");
> - exit(1);
> - }
>
> src = c->lxc_conf->rootfs.path;
> /*
> @@ -815,9 +874,19 @@ static bool create_run_template(struct lxc_container *c, char *tpath, bool quiet
> exit(1);
> }
>
> - if (bdev->ops->mount(bdev) < 0) {
> - ERROR("Error mounting rootfs");
> - exit(1);
> + if (strcmp(bdev->type, "dir") != 0) {
> + if (unshare(CLONE_NEWNS) < 0) {
> + ERROR("error unsharing mounts");
> + exit(1);
> + }
> + if (bdev->ops->mount(bdev) < 0) {
> + ERROR("Error mounting rootfs");
> + exit(1);
> + }
> + } else { // TODO come up with a better way here!
> + if (bdev->dest)
> + free(bdev->dest);
> + bdev->dest = strdup(bdev->src);
> }
>
> /*
> @@ -827,6 +896,7 @@ static bool create_run_template(struct lxc_container *c, char *tpath, bool quiet
> if (argv)
> for (nargs = 0; argv[nargs]; nargs++) ;
> nargs += 4; // template, path, rootfs and name args
> +
> newargv = malloc(nargs * sizeof(*newargv));
> if (!newargv)
> exit(1);
> @@ -870,8 +940,68 @@ static bool create_run_template(struct lxc_container *c, char *tpath, bool quiet
> exit(1);
> newargv[nargs - 1] = NULL;
>
> + /*
> + * If we're running the template in a mapped userns, then
> + * we prepend the template command with:
> + * lxc-usernsexec <-m map1> ... <-m mapn> --
> + */
> + if (geteuid() != 0 && !lxc_list_empty(&conf->id_map)) {
> + int n2args = 1;
> + char **n2 = malloc(n2args * sizeof(*n2));
> + struct lxc_list *it;
> + struct id_map *map;
> +
> + newargv[0] = tpath;
> + tpath = "lxc-usernsexec";
> + n2[0] = "lxc-usernsexec";
> + lxc_list_for_each(it, &conf->id_map) {
> + map = it->elem;
> + n2args += 2;
> + n2 = realloc(n2, n2args * sizeof(*n2));
> + if (!n2)
> + exit(1);
> + n2[n2args-2] = "-m";
> + n2[n2args-1] = malloc(200);
> + if (!n2[n2args-1])
> + exit(1);
> + ret = snprintf(n2[n2args-1], 200, "%c:%lu:%lu:%lu",
> + map->idtype == ID_TYPE_UID ? 'u' : 'g',
> + map->nsid, map->hostid, map->range);
> + if (ret < 0 || ret >= 200)
> + exit(1);
> + }
> + bool hostid_mapped = hostid_is_mapped(geteuid(), conf);
> + int extraargs = hostid_mapped ? 1 : 3;
> + n2 = realloc(n2, (nargs + n2args + extraargs) * sizeof(*n2));
> + if (!n2)
> + exit(1);
> + if (!hostid_mapped) {
> + int free_id = find_unmapped_nsuid(conf);
> + n2[n2args++] = "-m";
> + if (free_id < 0) {
> + ERROR("Could not find free uid to map");
> + exit(1);
> + }
> + n2[n2args++] = malloc(200);
> + if (!n2[n2args-1]) {
> + SYSERROR("out of memory");
> + exit(1);
> + }
> + ret = snprintf(n2[n2args-1], 200, "u:%d:%d:1",
> + free_id, geteuid());
> + if (ret < 0 || ret >= 200) {
> + ERROR("string too long");
> + exit(1);
> + }
> + }
> + n2[n2args++] = "--";
> + for (i = 0; i < nargs; i++)
> + n2[i + n2args] = newargv[i];
> + free(newargv);
> + newargv = n2;
> + }
> /* execute */
> - execv(tpath, newargv);
> + execvp(tpath, newargv);
> SYSERROR("failed to execute template %s", tpath);
> exit(1);
> }
> @@ -2100,15 +2230,21 @@ static int clone_update_rootfs(struct lxc_container *c0,
> return wait_for_pid(pid);
>
> process_unlock(); // we're no longer sharing
> - if (unshare(CLONE_NEWNS) < 0) {
> - ERROR("error unsharing mounts");
> - exit(1);
> - }
> bdev = bdev_init(c->lxc_conf->rootfs.path, c->lxc_conf->rootfs.mount, NULL);
> if (!bdev)
> exit(1);
> - if (bdev->ops->mount(bdev) < 0)
> - exit(1);
> + if (strcmp(bdev->type, "dir") != 0) {
> + if (unshare(CLONE_NEWNS) < 0) {
> + ERROR("error unsharing mounts");
> + exit(1);
> + }
> + if (bdev->ops->mount(bdev) < 0)
> + exit(1);
> + } else { // TODO come up with a better way
> + if (bdev->dest)
> + free(bdev->dest);
> + bdev->dest = strdup(bdev->src);
> + }
>
> if (!lxc_list_empty(&conf->hooks[LXCHOOK_CLONE])) {
> /* Start of environment variable setup for hooks */
> --
> 1.8.1.2
>
>
> ------------------------------------------------------------------------------
> October Webinars: Code for Performance
> Free Intel webinars can help you accelerate application performance.
> Explore tips for MPI, OpenMP, advanced profiling, and more. Get the most from
> the latest Intel processors and coprocessors. See abstracts and register >
> http://pubads.g.doubleclick.net/gampad/clk?id=60135991&iu=/4140/ostg.clktrk
> _______________________________________________
> Lxc-devel mailing list
> Lxc-devel at lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/lxc-devel
--
Stéphane Graber
Ubuntu developer
http://www.ubuntu.com
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 836 bytes
Desc: Digital signature
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20131023/f9eeb6a2/attachment.pgp>
More information about the lxc-devel
mailing list