[lxc-devel] [PATCH stable-1.0] Specially handle block device rootfs

Stéphane Graber stgraber at ubuntu.com
Wed Jun 4 15:29:36 UTC 2014


On Wed, Jun 04, 2014 at 10:16:10AM -0500, Serge Hallyn wrote:
> It is not possible to mount a block device from a non-init user namespace.
> Therefore if root on the host is starting a container with a uid
> mapping, and the rootfs is a block device, then mount the rootfs before
> we spawn the container init task.
> 
> This addresses https://github.com/lxc/lxc/issues/221
> 
> Signed-off-by: Serge Hallyn <serge.hallyn at ubuntu.com>
> Acked-by: Stéphane Graber <stgraber at ubuntu.com>

Thanks!

Acked-by: Stéphane Graber <stgraber at ubuntu.com>

> 
> Conflicts:
> 	src/lxc/bdev.c
> 	src/lxc/bdev.h
> 	src/lxc/conf.h
> 	src/lxc/start.c
> ---
>  src/lxc/bdev.c  | 39 +++++++++++++++++++++++++++++++++-----
>  src/lxc/bdev.h  |  2 ++
>  src/lxc/conf.c  | 59 +++++++++++++++++++++++++++++++++++++++++----------------
>  src/lxc/conf.h  |  7 +++++++
>  src/lxc/start.c | 16 ++++++++++++++++
>  5 files changed, 102 insertions(+), 21 deletions(-)
> 
> diff --git a/src/lxc/bdev.c b/src/lxc/bdev.c
> index 20e9fb3..0d55c9d 100644
> --- a/src/lxc/bdev.c
> +++ b/src/lxc/bdev.c
> @@ -2454,11 +2454,9 @@ struct bdev *bdev_get(const char *type)
>  	return bdev;
>  }
>  
> -struct bdev *bdev_init(const char *src, const char *dst, const char *mntopts)
> +static const struct bdev_type *bdev_query(const char *src)
>  {
>  	int i;
> -	struct bdev *bdev;
> -
>  	for (i=0; i<numbdevs; i++) {
>  		int r;
>  		r = bdevs[i].ops->detect(src);
> @@ -2468,12 +2466,24 @@ struct bdev *bdev_init(const char *src, const char *dst, const char *mntopts)
>  
>  	if (i == numbdevs)
>  		return NULL;
> +	return &bdevs[i];
> +}
> +
> +struct bdev *bdev_init(const char *src, const char *dst, const char *mntopts)
> +{
> +	struct bdev *bdev;
> +	const struct bdev_type *q;
> +
> +	q = bdev_query(src);
> +	if (!q)
> +		return NULL;
> +
>  	bdev = malloc(sizeof(struct bdev));
>  	if (!bdev)
>  		return NULL;
>  	memset(bdev, 0, sizeof(struct bdev));
> -	bdev->ops = bdevs[i].ops;
> -	bdev->type = bdevs[i].name;
> +	bdev->ops = q->ops;
> +	bdev->type = q->name;
>  	if (mntopts)
>  		bdev->mntopts = strdup(mntopts);
>  	if (src)
> @@ -2794,3 +2804,22 @@ char *overlay_getlower(char *p)
>  		*p1 = '\0';
>  	return p;
>  }
> +
> +bool rootfs_is_blockdev(struct lxc_conf *conf)
> +{
> +	const struct bdev_type *q;
> +	struct stat st;
> +	int ret;
> +
> +	ret = stat(conf->rootfs.path, &st);
> +	if (ret == 0 && S_ISBLK(st.st_mode))
> +		return true;
> +	q = bdev_query(conf->rootfs.path);
> +	if (!q)
> +		return false;
> +	if (strcmp(q->name, "lvm") == 0 ||
> +		strcmp(q->name, "loop") == 0 ||
> +		strcmp(q->name, "nbd") == 0)
> +		return true;
> +	return false;
> +}
> diff --git a/src/lxc/bdev.h b/src/lxc/bdev.h
> index eeb4b1b..3dcb961 100644
> --- a/src/lxc/bdev.h
> +++ b/src/lxc/bdev.h
> @@ -93,6 +93,8 @@ struct bdev *bdev_create(const char *dest, const char *type,
>  			const char *cname, struct bdev_specs *specs);
>  void bdev_put(struct bdev *bdev);
>  
> +bool rootfs_is_blockdev(struct lxc_conf *conf);
> +
>  /* define constants if the kernel/glibc headers don't define them */
>  #ifndef MS_DIRSYNC
>  #define MS_DIRSYNC  128
> diff --git a/src/lxc/conf.c b/src/lxc/conf.c
> index 6014e4e..e6067e1 100644
> --- a/src/lxc/conf.c
> +++ b/src/lxc/conf.c
> @@ -3810,15 +3810,26 @@ static void remount_all_slave(void)
>  		free(line);
>  }
>  
> -int lxc_setup(struct lxc_handler *handler)
> +/*
> + * This does the work of remounting / if it is shared, calling the
> + * container pre-mount hooks, and mounting the rootfs.
> + */
> +int do_rootfs_setup(struct lxc_conf *conf, const char *name, const char *lxcpath)
>  {
> -	const char *name = handler->name;
> -	struct lxc_conf *lxc_conf = handler->conf;
> -	const char *lxcpath = handler->lxcpath;
> -	void *data = handler->data;
> +	if (conf->rootfs_setup) {
> +		/*
> +		 * rootfs was set up in another namespace.  bind-mount it
> +		 * to give us a mount in our own ns so we can pivot_root to it
> +		 */
> +		const char *path = conf->rootfs.mount;
> +		if (mount(path, path, "rootfs", MS_BIND, NULL) < 0) {
> +			ERROR("Failed to bind-mount container / onto itself");
> +			return false;
> +		}
> +	}
>  
>  	if (detect_ramfs_rootfs()) {
> -		if (chroot_into_slave(lxc_conf)) {
> +		if (chroot_into_slave(conf)) {
>  			ERROR("Failed to chroot into slave /");
>  			return -1;
>  		}
> @@ -3826,6 +3837,32 @@ int lxc_setup(struct lxc_handler *handler)
>  
>  	remount_all_slave();
>  
> +	if (run_lxc_hooks(name, "pre-mount", conf, lxcpath, NULL)) {
> +		ERROR("failed to run pre-mount hooks for container '%s'.", name);
> +		return -1;
> +	}
> +
> +	if (setup_rootfs(conf)) {
> +		ERROR("failed to setup rootfs for '%s'", name);
> +		return -1;
> +	}
> +
> +	conf->rootfs_setup = true;
> +	return 0;
> +}
> +
> +int lxc_setup(struct lxc_handler *handler)
> +{
> +	const char *name = handler->name;
> +	struct lxc_conf *lxc_conf = handler->conf;
> +	const char *lxcpath = handler->lxcpath;
> +	void *data = handler->data;
> +
> +	if (do_rootfs_setup(lxc_conf, name, lxcpath) < 0) {
> +		ERROR("Error setting up rootfs mount after spawn");
> +		return -1;
> +	}
> +
>  	if (lxc_conf->inherit_ns_fd[LXC_NS_UTS] == -1) {
>  		if (setup_utsname(lxc_conf->utsname)) {
>  			ERROR("failed to setup the utsname for '%s'", name);
> @@ -3838,16 +3875,6 @@ int lxc_setup(struct lxc_handler *handler)
>  		return -1;
>  	}
>  
> -	if (run_lxc_hooks(name, "pre-mount", lxc_conf, lxcpath, NULL)) {
> -		ERROR("failed to run pre-mount hooks for container '%s'.", name);
> -		return -1;
> -	}
> -
> -	if (setup_rootfs(lxc_conf)) {
> -		ERROR("failed to setup rootfs for '%s'", name);
> -		return -1;
> -	}
> -
>  	if (lxc_conf->autodev < 0) {
>  		lxc_conf->autodev = check_autodev(lxc_conf->rootfs.mount, data);
>  	}
> diff --git a/src/lxc/conf.h b/src/lxc/conf.h
> index 5be8b1c..f5fab3d 100644
> --- a/src/lxc/conf.h
> +++ b/src/lxc/conf.h
> @@ -334,6 +334,10 @@ struct lxc_conf {
>  	int start_delay;
>  	int start_order;
>  	struct lxc_list groups;
> +	int nbd_idx;
> +
> +	/* set to true when rootfs has been setup */
> +	bool rootfs_setup;
>  };
>  
>  int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf,
> @@ -370,6 +374,9 @@ extern int lxc_clear_hooks(struct lxc_conf *c, const char *key);
>  extern int lxc_clear_idmaps(struct lxc_conf *c);
>  extern int lxc_clear_groups(struct lxc_conf *c);
>  
> +extern int do_rootfs_setup(struct lxc_conf *conf, const char *name,
> +			   const char *lxcpath);
> +
>  /*
>   * Configure the container from inside
>   */
> diff --git a/src/lxc/start.c b/src/lxc/start.c
> index df1304a..e6b4895 100644
> --- a/src/lxc/start.c
> +++ b/src/lxc/start.c
> @@ -69,6 +69,7 @@
>  #include "namespace.h"
>  #include "lxcseccomp.h"
>  #include "caps.h"
> +#include "bdev.h"
>  #include "lsm/lsm.h"
>  
>  lxc_log_define(lxc_start, lxc);
> @@ -1054,6 +1055,21 @@ int __lxc_start(const char *name, struct lxc_conf *conf,
>  		handler->conf->need_utmp_watch = 0;
>  	}
>  
> +	if (geteuid() == 0 && !lxc_list_empty(&conf->id_map)) {
> +		/* if the backing store is a device, mount it here and now */
> +		if (rootfs_is_blockdev(conf)) {
> +			if (unshare(CLONE_NEWNS) < 0) {
> +				ERROR("Error unsharing mounts");
> +				goto out_fini_nonet;
> +			}
> +			if (do_rootfs_setup(conf, name, lxcpath) < 0) {
> +				ERROR("Error setting up rootfs mount as root before spawn");
> +				goto out_fini_nonet;
> +			}
> +			INFO("Set up container rootfs as host root");
> +		}
> +	}
> +
>  	err = lxc_spawn(handler);
>  	if (err) {
>  		ERROR("failed to spawn '%s'", name);
> -- 
> 2.0.0
> 
> _______________________________________________
> lxc-devel mailing list
> lxc-devel at lists.linuxcontainers.org
> http://lists.linuxcontainers.org/listinfo/lxc-devel

-- 
Stéphane Graber
Ubuntu developer
http://www.ubuntu.com
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: Digital signature
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20140604/535b984b/attachment.sig>


More information about the lxc-devel mailing list