[lxc-devel] [PATCH] Support MS_SHARED /

Stéphane Graber stgraber at ubuntu.com
Thu Dec 20 09:49:41 UTC 2012


On 12/20/2012 06:58 AM, Serge Hallyn wrote:
> (I'll be out until Jan 2, but in the meantime, here is hopefully a
> little newyears gift - this seems to allow lxc-start with / being
> MS_SHARED on the host)
> 
> When / is MS_SHARED (for instance with f18 and modern arch), lxc-start
> fails on pivot_root.  The kernel enforces that, when doing pivot_root,
> the parent of current->fs->root (as well as the new root and the putold
> location) not be MS_SHARED.
> 
> To work around this, check /proc/self/mountinfo for a 'shared:' in
> the '/' line.  If it is there, then create a tiny MS_SLAVE tmpfs dir to
> serve as parent of /, recursively bind mount / into /root under that dir,
> make it rslave, and chroot into it.
> 
> Tested with ubuntu raring image after doing 'mount --make-rshared /'.

Tested here and works as expected.
For distros using apparmor, you'll likely need a few tweaks to the
profiles to make this work though.

/proc/mounts in the container will also end up being polluted by all the
mount points from the host, this in itself doesn't cause any big
problem, though the container will try (and fail) to unmount all of those.
Is there anything we can do to improve that situation or is that a side
effect of MS_SHARED that we can't workaround on our end?

I didn't spend much time reviewing the code itself, but it applied to my
local staging tree and built fine, so that's good enough for me :)

> Signed-off-by: Serge Hallyn <serge.hallyn at ubuntu.com>

Acked-by: Stéphane Graber <stgraber at ubuntu.com>

Pushing to staging.

> ---
>  src/lxc/conf.c  | 117 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--
>  src/lxc/conf.h  |   3 ++
>  src/lxc/start.c |   8 ++++
>  3 files changed, 125 insertions(+), 3 deletions(-)
> 
> diff --git a/src/lxc/conf.c b/src/lxc/conf.c
> index 65f18c8..96940b3 100644
> --- a/src/lxc/conf.c
> +++ b/src/lxc/conf.c
> @@ -986,8 +986,112 @@ static int setup_autodev(char *root)
>  	return 0;
>  }
>  
> -static int setup_rootfs(const struct lxc_rootfs *rootfs)
> +/*
> + * Detect whether / is mounted MS_SHARED.  The only way I know of to
> + * check that is through /proc/self/mountinfo.
> + * I'm only checking for /.  If the container rootfs or mount location
> + * is MS_SHARED, but not '/', then you're out of luck - figuring that
> + * out would be too much work to be worth it.
> + */
> +#define LINELEN 4096
> +int detect_shared_rootfs(void)
> +{
> +	char buf[LINELEN], *p;
> +	FILE *f;
> +	int i;
> +	char *p2;
> +
> +	f = fopen("/proc/self/mountinfo", "r");
> +	if (!f)
> +		return 0;
> +	while ((p = fgets(buf, LINELEN, f))) {
> +		INFO("looking at .%s.", p);
> +		for (p = buf, i=0; p && i < 4; i++)
> +			p = index(p+1, ' ');
> +		if (!p)
> +			continue;
> +		p2 = index(p+1, ' ');
> +		if (!p2)
> +			continue;
> +		*p2 = '\0';
> +		INFO("now p is .%s.", p);
> +		if (strcmp(p+1, "/") == 0) {
> +			// this is '/'.  is it shared?
> +			p = index(p2+1, ' ');
> +			if (strstr(p, "shared:"))
> +				return 1;
> +		}
> +	}
> +	fclose(f);
> +	return 0;
> +}
> +
> +/*
> + * I'll forgive you for asking whether all of this is needed :)  The
> + * answer is yes.
> + * pivot_root will fail if the new root, the put_old dir, or the parent
> + * of current->fs->root are MS_SHARED.  (parent of current->fs_root may
> + * or may not be current->fs_root - if we assumed it always was, we could
> + * just mount --make-rslave /).  So,
> + *    1. mount a tiny tmpfs to be parent of current->fs->root.
> + *    2. make that MS_SLAVE
> + *    3. make a 'root' directory under that
> + *    4. mount --rbind / under the $tinyroot/root.
> + *    5. make that rslave
> + *    6. chdir and chroot into $tinyroot/root
> + *    7. $tinyroot will be unmounted by our parent in start.c
> + */
> +static int chroot_into_slave(struct lxc_conf *conf)
> +{
> +	char path[MAXPATHLEN];
> +	const char *destpath = conf->rootfs.mount;
> +	int ret;
> +
> +	if (mount(destpath, destpath, NULL, MS_BIND, 0)) {
> +		SYSERROR("failed to mount %s bind", destpath);
> +		return -1;
> +	}
> +	if (mount("", destpath, NULL, MS_SLAVE, 0)) {
> +		SYSERROR("failed to make %s slave", destpath);
> +		return -1;
> +	}
> +	if (mount("none", destpath, "tmpfs", 0, "size=10000")) {
> +		SYSERROR("Failed to mount tmpfs / at %s", destpath);
> +		return -1;
> +	}
> +	ret = snprintf(path, MAXPATHLEN, "%s/root", destpath);
> +	if (ret < 0 || ret >= MAXPATHLEN) {
> +		ERROR("out of memory making root path");
> +		return -1;
> +	}
> +	if (mkdir(path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH)) {
> +		SYSERROR("Failed to create /dev/pts in container");
> +		return -1;
> +	}
> +	if (mount("/", path, NULL, MS_BIND|MS_REC, 0)) {
> +		SYSERROR("Failed to rbind mount / to %s", path);
> +		return -1;
> +	}
> +	if (mount("", destpath, NULL, MS_SLAVE|MS_REC, 0)) {
> +		SYSERROR("Failed to make tmp-/ at %s rslave", path);
> +		return -1;
> +	}
> +	if (chdir(path)) {
> +		SYSERROR("Failed to chdir into tmp-/");
> +		return -1;
> +	}
> +	if (chroot(path)) {
> +		SYSERROR("Failed to chroot into tmp-/");
> +		return -1;
> +	}
> +	INFO("Chrooted into tmp-/ at %s\n", path);
> +	return 0;
> +}
> +
> +static int setup_rootfs(struct lxc_conf *conf)
>  {
> +	const struct lxc_rootfs *rootfs = &conf->rootfs;
> +
>  	if (!rootfs->path)
>  		return 0;
>  
> @@ -997,6 +1101,13 @@ static int setup_rootfs(const struct lxc_rootfs *rootfs)
>  		return -1;
>  	}
>  
> +	if (detect_shared_rootfs()) {
> +		if (chroot_into_slave(conf)) {
> +			ERROR("Failed to chroot into slave /");
> +			return -1;
> +		}
> +	}
> +
>  	if (mount_rootfs(rootfs->path, rootfs->mount)) {
>  		ERROR("failed to mount rootfs");
>  		return -1;
> @@ -1225,7 +1336,7 @@ static int setup_kmsg(const struct lxc_rootfs *rootfs,
>  	return 0;
>  }
>  
> -static int setup_cgroup(const char *name, struct lxc_list *cgroups)
> +int setup_cgroup(const char *name, struct lxc_list *cgroups)
>  {
>  	struct lxc_list *iterator;
>  	struct lxc_cgroup *cg;
> @@ -2405,7 +2516,7 @@ int lxc_setup(const char *name, struct lxc_conf *lxc_conf)
>  		return -1;
>  	}
>  
> -	if (setup_rootfs(&lxc_conf->rootfs)) {
> +	if (setup_rootfs(lxc_conf)) {
>  		ERROR("failed to setup rootfs for '%s'", name);
>  		return -1;
>  	}
> diff --git a/src/lxc/conf.h b/src/lxc/conf.h
> index ca4dbc2..d496916 100644
> --- a/src/lxc/conf.h
> +++ b/src/lxc/conf.h
> @@ -259,6 +259,9 @@ struct lxc_conf {
>  
>  int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf);
>  
> +extern int setup_cgroup(const char *name, struct lxc_list *cgroups);
> +extern int detect_shared_rootfs(void);
> +
>  /*
>   * Initialize the lxc configuration structure
>   */
> diff --git a/src/lxc/start.c b/src/lxc/start.c
> index 05271fa..82a74d8 100644
> --- a/src/lxc/start.c
> +++ b/src/lxc/start.c
> @@ -700,6 +700,14 @@ int lxc_spawn(struct lxc_handler *handler)
>  	if (lxc_sync_barrier_child(handler, LXC_SYNC_POST_CONFIGURE))
>  		return -1;
>  
> +	if (detect_shared_rootfs())
> +		umount2(handler->conf->rootfs.mount, MNT_DETACH);
> +
> +	if (setup_cgroup(name, &handler->conf->cgroup)) {
> +		ERROR("failed to setup the cgroups for '%s'", name);
> +		goto out_delete_net;
> +	}
> +
>  	if (handler->ops->post_start(handler, handler->data))
>  		goto out_abort;
>  
> 


-- 
Stéphane Graber
Ubuntu developer
http://www.ubuntu.com

-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 899 bytes
Desc: OpenPGP digital signature
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20121220/9787664d/attachment.pgp>


More information about the lxc-devel mailing list