[lxc-devel] [PATCH] [RFC] lxc: don't call pivot_root if / is on a ramfs

Serge Hallyn serge.hallyn at ubuntu.com
Wed Oct 8 16:04:55 UTC 2014


Quoting Andrey Wagin (avagin at gmail.com):
> 2014-10-08 18:44 GMT+04:00 Serge Hallyn <serge.hallyn at ubuntu.com>:
> > Quoting Andrew Vagin (avagin at gmail.com):
> >> Here is an updated patch.
> >>
> >> Now ct->rootfs is bind-mounted into /, so we don't need to "bind-mount
> >> the root onto itself first".
> >
> > Hm, interesting.  I thought MS_MOVE was central to the approach.
> 
> mount --rbind ct_rootfs ct_rootfs
> mount --move ct_rootfs /
> 
> I think these two commands are equivalent to mount --rbind ct_rootfs /
> 
> I have read the code once again. rootfs->mount is always a mount
> point, because it used with pivot_root. In this case we don't need to
> bind-mount it into itselt or into root. But we should not foget about
> userns, where this mount is locked and can't be moved to somewhere.
> 
> Please, look at the attached patch. I guess it's the final version.

Thanks, I'll go with this.

Acked-by: Serge E. Hallyn <serge.hallyn at ubuntu.com>

> It remount all useless mount as private and contains a comment why we
> bind-mount the root instead of moving it.
> 
> Thanks.
> 
> >
> >> What is about "do the turn-into-slave first"? I don't know.
> >> remount_all_slave() is called from do_rootfs_setup(). Is it enough?
> >
> > Probably not, though one would *think* that any machine which has / on
> > ramfs doesn't have / ms_shared.  Anyway I was thinking
> > remount_all_slave() should be called before your new nf.
> lxc_setup
>   do_rootfs_setup()
>     remount_all_slave()
>   ...
>   setup_pivot_root
>     prepare_ramfs_root()

> From 49f365260cff8092f1343533868adabfbbd67580 Mon Sep 17 00:00:00 2001
> From: Andrey Vagin <avagin at gmail.com>
> Date: Sun, 5 Oct 2014 01:49:16 +0400
> Subject: [PATCH] [RFC] lxc: don't call pivot_root if / is on a ramfs
> 
> pivot_root can't be called if / is on a ramfs. Currently chroot is
> called before pivot_root. In this case the standard well-known
> 'chroot escape' technique allows to escape a container.
> 
> I think the best way to handle this situation is to make following actions:
> * clean all mounts, which should not be visible in CT
> * move CT's rootfs into /
> * make chroot into /
> 
> I don't have a host, where / is on a ramfs, so I can't test this patch.
> 
> Signed-off-by: Andrey Vagin <avagin at openvz.org>
> ---
>  src/lxc/conf.c | 166 +++++++++++++++++++++++++++++++++------------------------
>  1 file changed, 95 insertions(+), 71 deletions(-)
> 
> diff --git a/src/lxc/conf.c b/src/lxc/conf.c
> index 2d7ced9..3662503 100644
> --- a/src/lxc/conf.c
> +++ b/src/lxc/conf.c
> @@ -1449,68 +1449,6 @@ int lxc_delete_autodev(struct lxc_handler *handler)
>  	return 0;
>  }
>  
> -/*
> - * I'll forgive you for asking whether all of this is needed :)  The
> - * answer is yes.
> - * pivot_root will fail if the new root, the put_old dir, or the parent
> - * of current->fs->root are MS_SHARED.  (parent of current->fs_root may
> - * or may not be current->fs_root - if we assumed it always was, we could
> - * just mount --make-rslave /).  So,
> - *    1. mount a tiny tmpfs to be parent of current->fs->root.
> - *    2. make that MS_SLAVE
> - *    3. make a 'root' directory under that
> - *    4. mount --rbind / under the $tinyroot/root.
> - *    5. make that rslave
> - *    6. chdir and chroot into $tinyroot/root
> - *    7. $tinyroot will be unmounted by our parent in start.c
> - */
> -static int chroot_into_slave(struct lxc_conf *conf)
> -{
> -	char path[MAXPATHLEN];
> -	const char *destpath = conf->rootfs.mount;
> -	int ret;
> -
> -	if (mount(destpath, destpath, NULL, MS_BIND, 0)) {
> -		SYSERROR("failed to mount %s bind", destpath);
> -		return -1;
> -	}
> -	if (mount("", destpath, NULL, MS_SLAVE, 0)) {
> -		SYSERROR("failed to make %s slave", destpath);
> -		return -1;
> -	}
> -	if (mount("none", destpath, "tmpfs", 0, "size=10000,mode=755")) {
> -		SYSERROR("Failed to mount tmpfs / at %s", destpath);
> -		return -1;
> -	}
> -	ret = snprintf(path, MAXPATHLEN, "%s/root", destpath);
> -	if (ret < 0 || ret >= MAXPATHLEN) {
> -		ERROR("out of memory making root path");
> -		return -1;
> -	}
> -	if (mkdir(path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH)) {
> -		SYSERROR("Failed to create /dev/pts in container");
> -		return -1;
> -	}
> -	if (mount("/", path, NULL, MS_BIND|MS_REC, 0)) {
> -		SYSERROR("Failed to rbind mount / to %s", path);
> -		return -1;
> -	}
> -	if (mount("", destpath, NULL, MS_SLAVE|MS_REC, 0)) {
> -		SYSERROR("Failed to make tmp-/ at %s rslave", path);
> -		return -1;
> -	}
> -	if (chroot(path)) {
> -		SYSERROR("Failed to chroot into tmp-/");
> -		return -1;
> -	}
> -	if (chdir("/")) {
> -		SYSERROR("Failed to chdir into tmp-/");
> -		return -1;
> -	}
> -	INFO("Chrooted into tmp-/ at %s", path);
> -	return 0;
> -}
> -
>  static int setup_rootfs(struct lxc_conf *conf)
>  {
>  	const struct lxc_rootfs *rootfs = &conf->rootfs;
> @@ -1548,12 +1486,106 @@ static int setup_rootfs(struct lxc_conf *conf)
>  	return 0;
>  }
>  
> +int prepare_ramfs_root(char *root)
> +{
> +	char buf[LINELEN], *p;
> +	char nroot[PATH_MAX];
> +	FILE *f;
> +	int i;
> +	char *p2;
> +
> +	if (realpath(root, nroot) == NULL)
> +		return -1;
> +
> +	if (chdir("/") == -1)
> +		return -1;
> +
> +	/*
> +	 * We could use here MS_MOVE, but in userns this mount is
> +	 * locked and can't be moved.
> +	 */
> +	if (mount(root, "/", NULL, MS_REC | MS_BIND, NULL)) {
> +		SYSERROR("Failed to move %s into /", root);
> +		return -1;
> +	}
> +
> +	if (mount(".", NULL, NULL, MS_REC | MS_PRIVATE, NULL)) {
> +		SYSERROR("Failed to make . rprivate");
> +		return -1;
> +	}
> +
> +	/*
> +	 * The following code cleans up inhereted mounts which are not
> +	 * required for CT.
> +	 *
> +	 * The mountinfo file shows not all mounts, if a few points have been
> +	 * unmounted between read operations from the mountinfo. So we need to
> +	 * read mountinfo a few times.
> +	 *
> +	 * This loop can be skipped if a container uses unserns, because all
> +	 * inherited mounts are locked and we should live with all this trash.
> +	 */
> +	while (1) {
> +		int progress = 0;
> +
> +		f = fopen("./proc/self/mountinfo", "r");
> +		if (!f) {
> +			SYSERROR("Unable to open /proc/self/mountinfo");
> +			return -1;
> +		}
> +		while (fgets(buf, LINELEN, f)) {
> +			for (p = buf, i=0; p && i < 4; i++)
> +				p = strchr(p+1, ' ');
> +			if (!p)
> +				continue;
> +			p2 = strchr(p+1, ' ');
> +			if (!p2)
> +				continue;
> +
> +			*p2 = '\0';
> +			*p = '.';
> +
> +			if (strcmp(p + 1, "/") == 0)
> +				continue;
> +			if (strcmp(p + 1, "/proc") == 0)
> +				continue;
> +
> +			if (umount2(p, MNT_DETACH) == 0)
> +				progress++;
> +		}
> +		fclose(f);
> +		if (!progress)
> +			break;
> +	}
> +
> +	if (umount2("./proc", MNT_DETACH)) {
> +		SYSERROR("Unable to umount /proc");
> +		return -1;
> +	}
> +
> +	/* It is weird, but chdir("..") moves us in a new root */
> +	if (chdir("..") == -1) {
> +		SYSERROR("Unable to change working directory");
> +		return -1;
> +	}
> +
> +	if (chroot(".") == -1) {
> +		SYSERROR("Unable to chroot");
> +		return -1;
> +	}
> +
> +	return 0;
> +}
> +
>  static int setup_pivot_root(const struct lxc_rootfs *rootfs)
>  {
>  	if (!rootfs->path)
>  		return 0;
>  
> -	if (setup_rootfs_pivot_root(rootfs->mount, rootfs->pivot)) {
> +	if (detect_ramfs_rootfs()) {
> +		if (prepare_ramfs_root(rootfs->mount))
> +			return -1;
> +	} else if (setup_rootfs_pivot_root(rootfs->mount, rootfs->pivot)) {
>  		ERROR("failed to setup pivot root");
>  		return -1;
>  	}
> @@ -3952,14 +3984,6 @@ int do_rootfs_setup(struct lxc_conf *conf, const char *name, const char *lxcpath
>  		}
>  	}
>  
> -	if (1 || detect_ramfs_rootfs()) {
> -			ERROR("Failed to chroot into slave /");
> -		if (chroot_into_slave(conf)) {
> -			ERROR("Failed to chroot into slave /");
> -			return -1;
> -		}
> -	}
> -
>  	remount_all_slave();
>  
>  	if (run_lxc_hooks(name, "pre-mount", conf, lxcpath, NULL)) {
> -- 
> 1.9.3
> 



More information about the lxc-devel mailing list