[lxc-devel] [PATCH RFC] Enable use of user namespaces in containers

Stéphane Graber stgraber at ubuntu.com
Thu Dec 6 19:37:15 UTC 2012


On 12/06/2012 10:02 AM, Serge Hallyn wrote:
> The rootfs will need to be chowned to the mapped userids, which can
> be done with the /usr/bin/uidmapshift tool shipped with the nsexec
> package in ppa:serge-hallyn/userns-natty.
> The container config supports new entries of the form:
>  lxc.id_map = U 100000 0 10000
>  lxc.id_map = G 100000 0 10000
> meaning map 'virtual' uids (in the container) 0-10000 to uids
> 100000-110000 on the host, and same for gids.  So long as there are
> mappings specified in the container config, then CONFIG_NEWUSER will
> be used when the container is cloned.  This means that container
> setup is no longer done with root privilege on the host, only root
> privilege in the container.  Therefore cgroup setup is moved from the
> init task to the monitor task.
> 
> To use this patchset, you currently need to either use the raring
> kernel at ppa:serge-hallyn/usern-natty, or build your own kernel
> from either git://kernel.ubuntu.com/serge/quantal-userns.git branch
> master-next.dec3.userns or branch userns-always-map-user-v76 at
> git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace.git
> plus a patch enabling tmpfs mounts in userns.
> 
> You also need to chown the files in the container rootfs into the
> mapped range.  There is a utility at
> https://code.launchpad.net/~serge-hallyn/+junk/nsexec to do this.
> uidmapshift does the chowning, while the container-userns-convert
> script nicely wraps that program.  So I simply
> 
> 	sudo lxc-create -t ubuntu -n r1
> 	sudo container-userns-convert r1 200000
> 
> will create a container which is shifted so uid 0 in the container
> is uid 200000 on the host.
> 
> TODO: when doing setuid(0), need to only do that if 0 is one of the
> ids we map to.  Similarly, when dropping capabilities, need to only
> not do that if 0 is one of the ids we map to.
> 
> Signed-off-by: Serge Hallyn <serge.hallyn at ubuntu.com>

Nice, the code is much simpler than I expected.

I only read through pretty briefly and left a few comments, mostly
questions and cosmetics than problems with the code itself.

I'm also wondering, what's the state of lxc-attach wrt user namespaces?
does it need any updating too?

> ---
>  src/lxc/conf.c    |  141 +++++++++++++++++++++++++++++++++++++++++++++++++----
>  src/lxc/conf.h    |   26 ++++++++++
>  src/lxc/confile.c |   60 +++++++++++++++++++++++
>  src/lxc/start.c   |   35 +++++++++++++
>  4 files changed, 253 insertions(+), 9 deletions(-)
> 
> diff --git a/src/lxc/conf.c b/src/lxc/conf.c
> index 79d96d7..1a619d0 100644
> --- a/src/lxc/conf.c
> +++ b/src/lxc/conf.c
> @@ -1221,7 +1221,7 @@ static int setup_kmsg(const struct lxc_rootfs *rootfs,
>  	return 0;
>  }
>  
> -static int setup_cgroup(const char *name, struct lxc_list *cgroups)
> +int setup_cgroup(const char *name, struct lxc_list *cgroups)

Why the change?

>  {
>  	struct lxc_list *iterator;
>  	struct lxc_cgroup *cg;
> @@ -1882,6 +1882,7 @@ struct lxc_conf *lxc_conf_init(void)
>  	lxc_list_init(&new->network);
>  	lxc_list_init(&new->mount_list);
>  	lxc_list_init(&new->caps);
> +	lxc_list_init(&new->id_map);
>  	for (i=0; i<NUM_LXC_HOOKS; i++)
>  		lxc_list_init(&new->hooks[i]);
>  #if HAVE_APPARMOR
> @@ -2256,6 +2257,44 @@ int lxc_assign_network(struct lxc_list *network, pid_t pid)
>  	return 0;
>  }
>  
> +int add_id_mapping(enum idtype idtype, pid_t pid, uid_t host_start, uid_t ns_start, int range)
> +{
> +        char path[PATH_MAX];
> +        int ret;
> +        FILE *f;
> +
> +        ret = snprintf(path, PATH_MAX, "/proc/%d/%cid_map", pid, idtype == ID_TYPE_UID ? 'u' : 'g');
> +        if (ret < 0 || ret >= PATH_MAX) {
> +                fprintf(stderr, "%s: path name too long", __func__);
> +                return -E2BIG;
> +        }
> +        f = fopen(path, "w");
> +        if (!f) {
> +                perror("open");
> +                return -EINVAL;
> +        }
> +        ret = fprintf(f, "%d %d %d", ns_start, host_start, range);
> +        if (ret < 0)
> +                perror("write");
> +        fclose(f);
> +        return ret < 0 ? ret : 0;
> +}
> +
> +int lxc_map_ids(struct lxc_list *idmap, pid_t pid)
> +{
> +	struct lxc_list *iterator;
> +	struct id_map *map;
> +	int ret = 0;
> +
> +	lxc_list_for_each(iterator, idmap) {
> +		map = iterator->elem;
> +		ret = add_id_mapping(map->idtype, pid, map->hostid, map->nsid, map->range);
> +		if (ret)
> +			break;
> +	}
> +	return ret;
> +}
> +
>  int lxc_find_gateway_addresses(struct lxc_handler *handler)
>  {
>  	struct lxc_list *network = &handler->conf->network;
> @@ -2364,6 +2403,93 @@ void lxc_delete_tty(struct lxc_tty_info *tty_info)
>  	tty_info->nbtty = 0;
>  }
>  
> +/*
> + * given a host uid, return the ns uid if it is mapped.
> + * if it is not mapped, return the original host id.
> + */
> +static int shiftid(struct lxc_conf *c, int uid, enum idtype w)
> +{
> +	struct lxc_list *iterator;
> +	struct id_map *map;
> +	int low, high;
> +
> +	lxc_list_for_each(iterator, &c->id_map) {
> +		map = iterator->elem;
> +		if (map->idtype != w)
> +			continue;
> +
> +		low = map->nsid;
> +		high = map->nsid + map->range;
> +		if (uid < low || uid >= high)
> +			continue;
> +
> +		return uid - low + map->hostid;
> +	}
> +
> +	return uid;
> +}
> +
> +/*
> + * Take a pathname for a file created on the host, and map the uid and gid
> + * into the container if needed.  (Used for ttys)
> + */
> +static int uid_shift_file(char *path, struct lxc_conf *c)
> +{
> +	struct stat statbuf;
> +	int newuid, newgid;
> +
> +	if (stat(path, &statbuf)) {
> +		SYSERROR("stat(%s)", path);
> +		return -1;
> +	}
> +
> +	newuid = shiftid(c, statbuf.st_uid, ID_TYPE_UID);
> +	newgid = shiftid(c, statbuf.st_gid, ID_TYPE_GID);
> +	if (newuid != statbuf.st_uid || newgid != statbuf.st_gid) {
> +		DEBUG("chowning %s from %d:%d to %d:%d\n", path, statbuf.st_uid, statbuf.st_gid, newuid, newgid);
> +		if (chown(path, newuid, newgid)) {
> +			SYSERROR("chown(%s)", path);
> +			return -1;
> +		}
> +	}
> +	return 0;
> +}
> +
> +int uid_shift_ttys(int pid, struct lxc_conf *conf)
> +{
> +	int i, ret;
> +	struct lxc_tty_info *tty_info = &conf->tty_info;
> +	char path[MAXPATHLEN];
> +	char *ttydir = conf->ttydir;
> +
> +	if (!conf->rootfs.path)
> +		return 0;
> +	/* first the console */
> +	ret = snprintf(path, sizeof(path), "/proc/%d/root/dev/%s/console", pid, ttydir ? ttydir : "");
> +	if (ret < 0 || ret >= sizeof(path)) {
> +		ERROR("console path too long\n");
> +		return -1;
> +	}
> +	if (uid_shift_file(path, conf)) {
> +		DEBUG("Failed to chown the console %s.\n", path);
> +		return -1;
> +	}
> +	for (i=0; i< tty_info->nbtty; i++) {
> +		ret = snprintf(path, sizeof(path), "/proc/%d/root/dev/%s/tty%d",
> +			pid, ttydir ? ttydir : "", i + 1);
> +		if (ret < 0 || ret >= sizeof(path)) {
> +			ERROR("pathname too long for ttys");
> +			return -1;
> +		}
> +		if (uid_shift_file(path, conf)) {
> +			DEBUG("Failed to chown pty %s.\n", path);
> +			return -1;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
>  int lxc_setup(const char *name, struct lxc_conf *lxc_conf)
>  {
>  #if HAVE_APPARMOR /* || HAVE_SMACK || HAVE_SELINUX */
> @@ -2419,11 +2545,6 @@ int lxc_setup(const char *name, struct lxc_conf *lxc_conf)
>  		}
>  	}
>  
> -	if (setup_cgroup(name, &lxc_conf->cgroup)) {
> -		ERROR("failed to setup the cgroups for '%s'", name);
> -		return -1;
> -	}
> -
>  	if (setup_console(&lxc_conf->rootfs, &lxc_conf->console, lxc_conf->ttydir)) {
>  		ERROR("failed to setup the console for '%s'", name);
>  		return -1;
> @@ -2467,9 +2588,11 @@ int lxc_setup(const char *name, struct lxc_conf *lxc_conf)
>  		return -1;
>  	}
>  
> -	if (setup_caps(&lxc_conf->caps)) {
> -		ERROR("failed to drop capabilities");
> -		return -1;
> +	if (lxc_list_empty(&lxc_conf->id_map)) {
> +		if (setup_caps(&lxc_conf->caps)) {
> +			ERROR("failed to drop capabilities");
> +			return -1;
> +		}
>  	}

Why can't we drop capabilities in a user namespace?


>  	NOTICE("'%s' is setup.", name);
> diff --git a/src/lxc/conf.h b/src/lxc/conf.h
> index 694bce4..97b9274 100644
> --- a/src/lxc/conf.h
> +++ b/src/lxc/conf.h
> @@ -137,6 +137,26 @@ struct lxc_cgroup {
>  	char *value;
>  };
>  
> +enum idtype {
> +	ID_TYPE_UID,
> +	ID_TYPE_GID
> +};
> +
> +/*
> + * id_map is an id map entry.  Form in confile is:
> + * lxc.id_map = U 9800 0 100
> + * lxc.id_map = U 9900 1000 100
> + * lxc.id_map = G 9800 0 100
> + * lxc.id_map = G 9900 1000 100
> + * meaning the container can use uids and gids 0-100 and 1000-1100,
> + * with uid 0 mapping to uid 9800 on the host, and gid 1000 to
> + * gid 9900 on the host.
> + */
> +struct id_map {
> +	enum idtype idtype;
> +	int hostid, nsid, range;
> +};
> +
>  /*
>   * Defines a structure containing a pty information for
>   * virtualizing a tty
> @@ -220,6 +240,7 @@ struct lxc_conf {
>  	int personality;
>  	struct utsname *utsname;
>  	struct lxc_list cgroup;
> +	struct lxc_list id_map;
>  	struct lxc_list network;
>  	struct lxc_list mount_list;
>  	struct lxc_list caps;
> @@ -256,6 +277,7 @@ extern int pin_rootfs(const char *rootfs);
>  extern int lxc_create_network(struct lxc_handler *handler);
>  extern void lxc_delete_network(struct lxc_handler *handler);
>  extern int lxc_assign_network(struct lxc_list *networks, pid_t pid);
> +extern int lxc_map_ids(struct lxc_list *idmap, pid_t pid);
>  extern int lxc_find_gateway_addresses(struct lxc_handler *handler);
>  
>  extern int lxc_create_tty(const char *name, struct lxc_conf *conf);
> @@ -268,6 +290,10 @@ extern int lxc_clear_cgroups(struct lxc_conf *c, const char *key);
>  extern int lxc_clear_mount_entries(struct lxc_conf *c);
>  extern int lxc_clear_hooks(struct lxc_conf *c, const char *key);
>  
> +extern int setup_cgroup(const char *name, struct lxc_list *cgroups);
> +
> +extern int uid_shift_ttys(int pid, struct lxc_conf *conf);
> +
>  /*
>   * Configure the container from inside
>   */
> diff --git a/src/lxc/confile.c b/src/lxc/confile.c
> index a64ae09..1fa6189 100644
> --- a/src/lxc/confile.c
> +++ b/src/lxc/confile.c
> @@ -55,6 +55,7 @@ static int config_ttydir(const char *, const char *, struct lxc_conf *);
>  static int config_aa_profile(const char *, const char *, struct lxc_conf *);
>  #endif
>  static int config_cgroup(const char *, const char *, struct lxc_conf *);
> +static int config_idmap(const char *, const char *, struct lxc_conf *);
>  static int config_loglevel(const char *, const char *, struct lxc_conf *);
>  static int config_logfile(const char *, const char *, struct lxc_conf *);
>  static int config_mount(const char *, const char *, struct lxc_conf *);
> @@ -94,6 +95,7 @@ static struct lxc_config_t config[] = {
>  	{ "lxc.aa_profile",            config_aa_profile          },
>  #endif
>  	{ "lxc.cgroup",               config_cgroup               },
> +	{ "lxc.id_map",               config_idmap                },
>  	{ "lxc.loglevel",             config_loglevel             },
>  	{ "lxc.logfile",              config_logfile              },
>  	{ "lxc.mount",                config_mount                },
> @@ -1021,6 +1023,64 @@ out:
>  	return -1;
>  }
>  
> +static int config_idmap(const char *key, const char *value, struct lxc_conf *lxc_conf)
> +{
> +	char *token = "lxc.id_map";
> +	char *subkey;
> +	struct lxc_list *idmaplist = NULL;
> +	struct id_map *idmap = NULL;
> +	int hostid, nsid, range;
> +	char type;
> +	int ret;
> +
> +	subkey = strstr(key, token);
> +
> +	if (!subkey)
> +		return -1;
> +
> +	if (!strlen(subkey))
> +		return -1;
> +
> +	idmaplist = malloc(sizeof(*idmaplist));
> +	if (!idmaplist)
> +		goto out;
> +
> +	idmap = malloc(sizeof(*idmap));
> +	if (!idmap)
> +		goto out;
> +	memset(idmap, 0, sizeof(*idmap));
> +
> +	idmaplist->elem = idmap;
> +
> +	lxc_list_add_tail(&lxc_conf->id_map, idmaplist);
> +
> +	ret = sscanf(value, "%c %d %d %d", &type, &hostid, &nsid, &range);
> +	if (ret != 4)
> +		goto out;
> +	INFO("read uid map: type %c hostid %d nsid %d range %d", type, hostid, nsid, range);
> +	if (type == 'U')
> +		idmap->idtype = ID_TYPE_UID;
> +	else if (type == 'G')
> +		idmap->idtype = ID_TYPE_GID;
> +	else 
> +		goto out;
> +	idmap->hostid = hostid;
> +	idmap->nsid = nsid;
> +	idmap->range = range;
> +
> +	return 0;
> +
> +out:
> +	if (idmaplist)
> +		free(idmaplist);
> +
> +	if (idmap) {
> +		free(idmap);
> +	}

^ code style isn't really consistent here :)

> +	return -1;
> +}
> +
>  static int config_path_item(const char *key, const char *value,
>  			    struct lxc_conf *lxc_conf, char **conf_item)
>  {
> diff --git a/src/lxc/start.c b/src/lxc/start.c
> index 3e26b27..8d03b69 100644
> --- a/src/lxc/start.c
> +++ b/src/lxc/start.c
> @@ -542,6 +542,22 @@ static int do_start(void *data)
>  	if (lxc_sync_barrier_parent(handler, LXC_SYNC_CONFIGURE))
>  		return -1;
>  
> +	/*
> +	 * if we are in a new user namespace, become root there to have
> +	 * privilege over our namespace
> +	 */
> +	if (!lxc_list_empty(&handler->conf->id_map)) {
> +		NOTICE("switching to gid/uid 0 in new user namespace");
> +		if (setgid(0)) {
> +			SYSERROR("setgid");
> +			exit(1);
> +		}
> +		if (setuid(0)) {
> +			SYSERROR("setuid");
> +			exit(1);
> +		}
> +	}
> +
>  	if (handler->conf->need_utmp_watch) {
>  		if (prctl(PR_CAPBSET_DROP, CAP_SYS_BOOT, 0, 0, 0)) {
>  			SYSERROR("failed to remove CAP_SYS_BOOT capability");
> @@ -589,6 +605,10 @@ int lxc_spawn(struct lxc_handler *handler)
>  		return -1;
>  
>  	handler->clone_flags = CLONE_NEWUTS|CLONE_NEWPID|CLONE_NEWIPC|CLONE_NEWNS;
> +	if (!lxc_list_empty(&handler->conf->id_map)) {
> +		INFO("Cloning a new user namespace");
> +		handler->clone_flags |= CLONE_NEWUSER;
> +	}
>  	if (!lxc_list_empty(&handler->conf->network)) {
>  
>  		handler->clone_flags |= CLONE_NEWNET;
> @@ -650,12 +670,27 @@ int lxc_spawn(struct lxc_handler *handler)
>  		}
>  	}
>  
> +	if (lxc_map_ids(&handler->conf->id_map, handler->pid)) {
> +		ERROR("failed to set up id mapping");
> +		goto out_delete_net;
> +	}
> +
>  	/* Tell the child to continue its initialization and wait for
>  	 * it to exec or return an error
>  	 */
>  	if (lxc_sync_barrier_child(handler, LXC_SYNC_POST_CONFIGURE))
>  		return -1;
>  
> +	if (setup_cgroup(name, &handler->conf->cgroup)) {
> +		ERROR("failed to setup the cgroups for '%s'", name);
> +		goto out_delete_net;
> +	}
> +
> +	/* If child is in a fresh user namespace, chown his ptys for
> +	 * him */
> +	if (uid_shift_ttys(handler->pid, handler->conf))
> +		DEBUG("Failed to chown ptys.\n");
> +
>  	if (handler->ops->post_start(handler, handler->data))
>  		goto out_abort;
>  
> 


-- 
Stéphane Graber
Ubuntu developer
http://www.ubuntu.com

-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 899 bytes
Desc: OpenPGP digital signature
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20121206/38aa3b46/attachment.pgp>


More information about the lxc-devel mailing list