[lxc-devel] [PATCH 1/3] container creation: support unpriv container creation in user namespaces

Michael H. Warfield mhw at WittsEnd.com
Fri Oct 25 04:06:45 UTC 2013


On Wed, 2013-10-23 at 01:02 +0000, Serge Hallyn wrote: 
> From: Serge Hallyn <serge.hallyn at ubuntu.com>

> 1. lxcapi_create: don't try to unshare and mount for dir backed containers

> It's unnecessary, and breaks unprivileged lxc-create (since unpriv users
> cannot yet unshare(CLONE_NEWNS)).

I saw this and thought "I wonder if this fixes the dangling mount
problem" I described in an earlier message.  Nothing to do with being an
unpriv user, since it was being run as root, but right smack where that
problem seem to be.

Just retested with latest from git...  Problem gone.  This fix seems to
have eliminated the dangling mounts on /usr/lib64/lxc/rootfs from
lxc-create.

Regards,
Mike

> 2. api_create: chown rootfs
> 
> chown rootfs to the host uid to which container root will be mapped
> 
> 3. create: run template in a mapped user ns
> 
> 4. use (setuid-root) newxidmap to set id_map if we are not root
> 
> This is needed to be able to set userns mappings as an unprivileged
> user, for unprivileged lxc-start.
> 
> Signed-off-by: Serge Hallyn <serge.hallyn at ubuntu.com>
> ---
>  src/lxc/conf.c         | 102 +++++++++++++++++++++++++-----
>  src/lxc/conf.h         |   4 ++
>  src/lxc/lxccontainer.c | 164 ++++++++++++++++++++++++++++++++++++++++++++-----
>  3 files changed, 240 insertions(+), 30 deletions(-)
> 
> diff --git a/src/lxc/conf.c b/src/lxc/conf.c
> index 208c08b..3f7f0ef 100644
> --- a/src/lxc/conf.c
> +++ b/src/lxc/conf.c
> @@ -2802,31 +2802,49 @@ int lxc_map_ids(struct lxc_list *idmap, pid_t pid)
>  	int ret = 0;
>  	enum idtype type;
>  	char *buf = NULL, *pos;
> +	int am_root = (getuid() == 0);
>  
>  	for(type = ID_TYPE_UID; type <= ID_TYPE_GID; type++) {
>  		int left, fill;
> -
> -		pos = buf;
> -		lxc_list_for_each(iterator, idmap) {
> -			/* The kernel only takes <= 4k for writes to /proc/<nr>/[ug]id_map */
> -			if (!buf)
> -				buf = pos = malloc(4096);
> +		int had_entry = 0;
> +		if (!buf) {
> +			buf = pos = malloc(4096);
>  			if (!buf)
>  				return -ENOMEM;
> +		}
> +		pos = buf;
> +		if (!am_root)
> +			pos += sprintf(buf, "new%cidmap %d ",
> +				type == ID_TYPE_UID ? 'u' : 'g',
> +				pid);
>  
> +		lxc_list_for_each(iterator, idmap) {
> +			/* The kernel only takes <= 4k for writes to /proc/<nr>/[ug]id_map */
>  			map = iterator->elem;
> -			if (map->idtype == type) {
> -				left = 4096 - (pos - buf);
> -				fill = snprintf(pos, left, "%lu %lu %lu\n",
> -					map->nsid, map->hostid, map->range);
> -				if (fill <= 0 || fill >= left)
> -					SYSERROR("snprintf failed, too many mappings");
> -				pos += fill;
> -			}
> +			if (map->idtype != type)
> +				continue;
> +
> +			had_entry = 1;
> +			left = 4096 - (pos - buf);
> +			fill = snprintf(pos, left, " %lu %lu %lu", map->nsid,
> +					map->hostid, map->range);
> +			if (fill <= 0 || fill >= left)
> +				SYSERROR("snprintf failed, too many mappings");
> +			pos += fill;
>  		}
> -		if (pos == buf) // no mappings were found
> +		if (!had_entry)
>  			continue;
> -		ret = write_id_mapping(type, pid, buf, pos-buf);
> +		left = 4096 - (pos - buf);
> +		fill = snprintf(pos, left, "\n");
> +		if (fill <= 0 || fill >= left)
> +			SYSERROR("snprintf failed, too many mappings");
> +		pos += fill;
> +
> +		if (am_root)
> +			ret = write_id_mapping(type, pid, buf, pos-buf);
> +		else
> +			ret = system(buf);
> +
>  		if (ret)
>  			break;
>  	}
> @@ -2836,6 +2854,58 @@ int lxc_map_ids(struct lxc_list *idmap, pid_t pid)
>  	return ret;
>  }
>  
> +/*
> + * return the host uid to which the container root is mapped, or -1 on
> + * error
> + */
> +int get_mapped_rootid(struct lxc_conf *conf)
> +{
> +	struct lxc_list *it;
> +	struct id_map *map;
> +
> +	lxc_list_for_each(it, &conf->id_map) {
> +		map = it->elem;
> +		if (map->idtype != ID_TYPE_UID)
> +			continue;
> +		if (map->nsid != 0)
> +			continue;
> +		return map->hostid;
> +	}
> +	return -1;
> +}
> +
> +bool hostid_is_mapped(int id, struct lxc_conf *conf)
> +{
> +	struct lxc_list *it;
> +	struct id_map *map;
> +	lxc_list_for_each(it, &conf->id_map) {
> +		map = it->elem;
> +		if (map->idtype != ID_TYPE_UID)
> +			continue;
> +		if (id >= map->hostid && id < map->hostid + map->range)
> +			return true;
> +	}
> +	return false;
> +}
> +
> +int find_unmapped_nsuid(struct lxc_conf *conf)
> +{
> +	struct lxc_list *it;
> +	struct id_map *map;
> +	uid_t freeid = 0;
> +again:
> +	lxc_list_for_each(it, &conf->id_map) {
> +		map = it->elem;
> +		if (map->idtype != ID_TYPE_UID)
> +			continue;
> +		if (freeid >= map->nsid && freeid < map->nsid + map->range) {
> +			freeid = map->nsid + map->range;
> +			goto again;
> +		}
> +	}
> +	return freeid;
> +}
> +
>  int lxc_find_gateway_addresses(struct lxc_handler *handler)
>  {
>  	struct lxc_list *network = &handler->conf->network;
> diff --git a/src/lxc/conf.h b/src/lxc/conf.h
> index 84acce8..445867d 100644
> --- a/src/lxc/conf.h
> +++ b/src/lxc/conf.h
> @@ -361,4 +361,8 @@ extern int lxc_setup(const char *name, struct lxc_conf *lxc_conf,
>  			const char *lxcpath, struct cgroup_process_info *cgroup_info);
>  
>  extern void lxc_rename_phys_nics_on_shutdown(struct lxc_conf *conf);
> +
> +extern int get_mapped_rootid(struct lxc_conf *conf);
> +extern int find_unmapped_nsuid(struct lxc_conf *conf);
> +extern bool hostid_is_mapped(int id, struct lxc_conf *conf);
>  #endif
> diff --git a/src/lxc/lxccontainer.c b/src/lxc/lxccontainer.c
> index c8ecef3..816eb39 100644
> --- a/src/lxc/lxccontainer.c
> +++ b/src/lxc/lxccontainer.c
> @@ -694,6 +694,49 @@ static const char *lxcapi_get_config_path(struct lxc_container *c);
>  static bool lxcapi_set_config_item(struct lxc_container *c, const char *key, const char *v);
>  
>  /*
> + * chown_mapped: for an unprivileged user with uid X to chown a dir
> + * to subuid Y, he needs to run chown as root in a userns where
> + * nsid 0 is mapped to hostuid Y, and nsid Y is mapped to hostuid
> + * X.  That way, the container root is privileged with respect to
> + * hostuid X, allowing him to do the chown.
> + */
> +static int chown_mapped(int nsrootid, char *path)
> +{
> +	if (nsrootid < 0)
> +		return nsrootid;
> +	pid_t pid = fork();
> +	if (pid < 0) {
> +		SYSERROR("Failed forking");
> +		return -1;
> +	}
> +	if (!pid) {
> +		int hostuid = geteuid(), ret;
> +		char map1[100], map2[100];
> +		char *args[] = {"lxc-usernsexec", "-m", map1, "-m", map2, "--", "chown",
> +				 "0", path, NULL};
> +
> +		// "b:0:nsrootid:1"
> +		ret = snprintf(map1, 100, "b:0:%d:1", nsrootid);
> +		if (ret < 0 || ret >= 100) {
> +			ERROR("Error uid printing map string");
> +			return -1;
> +		}
> +
> +		// "b:hostuid:hostuid:1"
> +		ret = snprintf(map2, 100, "b:%d:%d:1", hostuid, hostuid);
> +		if (ret < 0 || ret >= 100) {
> +			ERROR("Error uid printing map string");
> +			return -1;
> +		}
> +
> +		ret = execvp("lxc-usernsexec", args);
> +		SYSERROR("Failed executing lxc-usernsexec");
> +		exit(1);
> +	}
> +	return wait_for_pid(pid);
> +}
> +
> +/*
>   * do_bdev_create: thin wrapper around bdev_create().  Like bdev_create(),
>   * it returns a mounted bdev on success, NULL on error.
>   */
> @@ -720,6 +763,25 @@ static struct bdev *do_bdev_create(struct lxc_container *c, const char *type,
>  	}
>  
>  	lxcapi_set_config_item(c, "lxc.rootfs", bdev->src);
> +
> +	/* if we are not root, chown the rootfs dir to root in the
> +	 * target uidmap */
> +
> +	if (geteuid() != 0) {
> +		int rootid;
> +		if ((rootid = get_mapped_rootid(c->lxc_conf)) <= 0) {
> +			ERROR("No mapping for container root");
> +			bdev_put(bdev);
> +			return NULL;
> +		}
> +		ret = chown_mapped(rootid, bdev->dest);
> +		if (ret < 0) {
> +			ERROR("Error chowning %s to %d\n", bdev->dest, rootid);
> +			bdev_put(bdev);
> +			return NULL;
> +		}
> +	}
> +
>  	return bdev;
>  }
>  
> @@ -785,6 +847,7 @@ static bool create_run_template(struct lxc_container *c, char *tpath, bool quiet
>  		int i;
>  		int ret, len, nargs = 0;
>  		char **newargv;
> +		struct lxc_conf *conf = c->lxc_conf;
>  
>  		process_unlock(); // we're no longer sharing
>  		if (quiet) {
> @@ -795,10 +858,6 @@ static bool create_run_template(struct lxc_container *c, char *tpath, bool quiet
>  			open("/dev/null", O_RDWR);
>  			open("/dev/null", O_RDWR);
>  		}
> -		if (unshare(CLONE_NEWNS) < 0) {
> -			ERROR("error unsharing mounts");
> -			exit(1);
> -		}
>  
>  		src = c->lxc_conf->rootfs.path;
>  		/*
> @@ -815,9 +874,19 @@ static bool create_run_template(struct lxc_container *c, char *tpath, bool quiet
>  			exit(1);
>  		}
>  
> -		if (bdev->ops->mount(bdev) < 0) {
> -			ERROR("Error mounting rootfs");
> -			exit(1);
> +		if (strcmp(bdev->type, "dir") != 0) {
> +			if (unshare(CLONE_NEWNS) < 0) {
> +				ERROR("error unsharing mounts");
> +				exit(1);
> +			}
> +			if (bdev->ops->mount(bdev) < 0) {
> +				ERROR("Error mounting rootfs");
> +				exit(1);
> +			}
> +		} else { // TODO come up with a better way here!
> +			if (bdev->dest)
> +				free(bdev->dest);
> +			bdev->dest = strdup(bdev->src);
>  		}
>  
>  		/*
> @@ -827,6 +896,7 @@ static bool create_run_template(struct lxc_container *c, char *tpath, bool quiet
>  		if (argv)
>  			for (nargs = 0; argv[nargs]; nargs++) ;
>  		nargs += 4;  // template, path, rootfs and name args
> +
>  		newargv = malloc(nargs * sizeof(*newargv));
>  		if (!newargv)
>  			exit(1);
> @@ -870,8 +940,68 @@ static bool create_run_template(struct lxc_container *c, char *tpath, bool quiet
>  			exit(1);
>  		newargv[nargs - 1] = NULL;
>  
> +		/*
> +		 * If we're running the template in a mapped userns, then
> +		 * we prepend the template command with:
> +		 * lxc-usernsexec <-m map1> ... <-m mapn> --
> +		 */
> +		if (geteuid() != 0 && !lxc_list_empty(&conf->id_map)) {
> +			int n2args = 1;
> +			char **n2 = malloc(n2args * sizeof(*n2));
> +			struct lxc_list *it;
> +			struct id_map *map;
> +
> +			newargv[0] = tpath;
> +			tpath = "lxc-usernsexec";
> +			n2[0] = "lxc-usernsexec";
> +			lxc_list_for_each(it, &conf->id_map) {
> +				map = it->elem;
> +				n2args += 2;
> +				n2 = realloc(n2, n2args * sizeof(*n2));
> +				if (!n2)
> +					exit(1);
> +				n2[n2args-2] = "-m";
> +				n2[n2args-1] = malloc(200);
> +				if (!n2[n2args-1])
> +					exit(1);
> +				ret = snprintf(n2[n2args-1], 200, "%c:%lu:%lu:%lu",
> +					map->idtype == ID_TYPE_UID ? 'u' : 'g',
> +					map->nsid, map->hostid, map->range);
> +				if (ret < 0 || ret >= 200)
> +					exit(1);
> +			}
> +			bool hostid_mapped = hostid_is_mapped(geteuid(), conf);
> +			int extraargs = hostid_mapped ?  1 : 3;
> +			n2 = realloc(n2, (nargs + n2args + extraargs) * sizeof(*n2));
> +			if (!n2)
> +				exit(1);
> +			if (!hostid_mapped) {
> +				int free_id = find_unmapped_nsuid(conf);
> +				n2[n2args++] = "-m";
> +				if (free_id < 0) {
> +					ERROR("Could not find free uid to map");
> +					exit(1);
> +				}
> +				n2[n2args++] = malloc(200);
> +				if (!n2[n2args-1]) {
> +					SYSERROR("out of memory");
> +					exit(1);
> +				}
> +				ret = snprintf(n2[n2args-1], 200, "u:%d:%d:1",
> +					free_id, geteuid());
> +				if (ret < 0 || ret >= 200) {
> +					ERROR("string too long");
> +					exit(1);
> +				}
> +			}
> +			n2[n2args++] = "--";
> +			for (i = 0; i < nargs; i++)
> +				n2[i + n2args] = newargv[i];
> +			free(newargv);
> +			newargv = n2;
> +		}
>  		/* execute */
> -		execv(tpath, newargv);
> +		execvp(tpath, newargv);
>  		SYSERROR("failed to execute template %s", tpath);
>  		exit(1);
>  	}
> @@ -2100,15 +2230,21 @@ static int clone_update_rootfs(struct lxc_container *c0,
>  		return wait_for_pid(pid);
>  
>  	process_unlock(); // we're no longer sharing
> -	if (unshare(CLONE_NEWNS) < 0) {
> -		ERROR("error unsharing mounts");
> -		exit(1);
> -	}
>  	bdev = bdev_init(c->lxc_conf->rootfs.path, c->lxc_conf->rootfs.mount, NULL);
>  	if (!bdev)
>  		exit(1);
> -	if (bdev->ops->mount(bdev) < 0)
> -		exit(1);
> +	if (strcmp(bdev->type, "dir") != 0) {
> +		if (unshare(CLONE_NEWNS) < 0) {
> +			ERROR("error unsharing mounts");
> +			exit(1);
> +		}
> +		if (bdev->ops->mount(bdev) < 0)
> +			exit(1);
> +	} else { // TODO come up with a better way
> +		if (bdev->dest)
> +			free(bdev->dest);
> +		bdev->dest = strdup(bdev->src);
> +	}
>  
>  	if (!lxc_list_empty(&conf->hooks[LXCHOOK_CLONE])) {
>  		/* Start of environment variable setup for hooks */
> -- 
> 1.8.1.2
> 
> 
> ------------------------------------------------------------------------------
> October Webinars: Code for Performance
> Free Intel webinars can help you accelerate application performance.
> Explore tips for MPI, OpenMP, advanced profiling, and more. Get the most from 
> the latest Intel processors and coprocessors. See abstracts and register >
> http://pubads.g.doubleclick.net/gampad/clk?id=60135991&iu=/4140/ostg.clktrk
> _______________________________________________
> Lxc-devel mailing list
> Lxc-devel at lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/lxc-devel
> 

-- 
Michael H. Warfield (AI4NB) | (770) 985-6132 |  mhw at WittsEnd.com
   /\/\|=mhw=|\/\/          | (678) 463-0932 |  http://www.wittsend.com/mhw/
   NIC whois: MHW9          | An optimist believes we live in the best of all
 PGP Key: 0x674627FF        | possible worlds.  A pessimist is sure of it!
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 482 bytes
Desc: This is a digitally signed message part
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20131025/76533ca0/attachment.pgp>


More information about the lxc-devel mailing list