[lxc-devel] [PATCH] [RFC] lxc: don't call pivot_root if / is on a ramfs
Serge Hallyn
serge.hallyn at ubuntu.com
Wed Oct 8 16:04:55 UTC 2014
Quoting Andrey Wagin (avagin at gmail.com):
> 2014-10-08 18:44 GMT+04:00 Serge Hallyn <serge.hallyn at ubuntu.com>:
> > Quoting Andrew Vagin (avagin at gmail.com):
> >> Here is an updated patch.
> >>
> >> Now ct->rootfs is bind-mounted into /, so we don't need to "bind-mount
> >> the root onto itself first".
> >
> > Hm, interesting. I thought MS_MOVE was central to the approach.
>
> mount --rbind ct_rootfs ct_rootfs
> mount --move ct_rootfs /
>
> I think these two commands are equivalent to mount --rbind ct_rootfs /
>
> I have read the code once again. rootfs->mount is always a mount
> point, because it used with pivot_root. In this case we don't need to
> bind-mount it into itselt or into root. But we should not foget about
> userns, where this mount is locked and can't be moved to somewhere.
>
> Please, look at the attached patch. I guess it's the final version.
Thanks, I'll go with this.
Acked-by: Serge E. Hallyn <serge.hallyn at ubuntu.com>
> It remount all useless mount as private and contains a comment why we
> bind-mount the root instead of moving it.
>
> Thanks.
>
> >
> >> What is about "do the turn-into-slave first"? I don't know.
> >> remount_all_slave() is called from do_rootfs_setup(). Is it enough?
> >
> > Probably not, though one would *think* that any machine which has / on
> > ramfs doesn't have / ms_shared. Anyway I was thinking
> > remount_all_slave() should be called before your new nf.
> lxc_setup
> do_rootfs_setup()
> remount_all_slave()
> ...
> setup_pivot_root
> prepare_ramfs_root()
> From 49f365260cff8092f1343533868adabfbbd67580 Mon Sep 17 00:00:00 2001
> From: Andrey Vagin <avagin at gmail.com>
> Date: Sun, 5 Oct 2014 01:49:16 +0400
> Subject: [PATCH] [RFC] lxc: don't call pivot_root if / is on a ramfs
>
> pivot_root can't be called if / is on a ramfs. Currently chroot is
> called before pivot_root. In this case the standard well-known
> 'chroot escape' technique allows to escape a container.
>
> I think the best way to handle this situation is to make following actions:
> * clean all mounts, which should not be visible in CT
> * move CT's rootfs into /
> * make chroot into /
>
> I don't have a host, where / is on a ramfs, so I can't test this patch.
>
> Signed-off-by: Andrey Vagin <avagin at openvz.org>
> ---
> src/lxc/conf.c | 166 +++++++++++++++++++++++++++++++++------------------------
> 1 file changed, 95 insertions(+), 71 deletions(-)
>
> diff --git a/src/lxc/conf.c b/src/lxc/conf.c
> index 2d7ced9..3662503 100644
> --- a/src/lxc/conf.c
> +++ b/src/lxc/conf.c
> @@ -1449,68 +1449,6 @@ int lxc_delete_autodev(struct lxc_handler *handler)
> return 0;
> }
>
> -/*
> - * I'll forgive you for asking whether all of this is needed :) The
> - * answer is yes.
> - * pivot_root will fail if the new root, the put_old dir, or the parent
> - * of current->fs->root are MS_SHARED. (parent of current->fs_root may
> - * or may not be current->fs_root - if we assumed it always was, we could
> - * just mount --make-rslave /). So,
> - * 1. mount a tiny tmpfs to be parent of current->fs->root.
> - * 2. make that MS_SLAVE
> - * 3. make a 'root' directory under that
> - * 4. mount --rbind / under the $tinyroot/root.
> - * 5. make that rslave
> - * 6. chdir and chroot into $tinyroot/root
> - * 7. $tinyroot will be unmounted by our parent in start.c
> - */
> -static int chroot_into_slave(struct lxc_conf *conf)
> -{
> - char path[MAXPATHLEN];
> - const char *destpath = conf->rootfs.mount;
> - int ret;
> -
> - if (mount(destpath, destpath, NULL, MS_BIND, 0)) {
> - SYSERROR("failed to mount %s bind", destpath);
> - return -1;
> - }
> - if (mount("", destpath, NULL, MS_SLAVE, 0)) {
> - SYSERROR("failed to make %s slave", destpath);
> - return -1;
> - }
> - if (mount("none", destpath, "tmpfs", 0, "size=10000,mode=755")) {
> - SYSERROR("Failed to mount tmpfs / at %s", destpath);
> - return -1;
> - }
> - ret = snprintf(path, MAXPATHLEN, "%s/root", destpath);
> - if (ret < 0 || ret >= MAXPATHLEN) {
> - ERROR("out of memory making root path");
> - return -1;
> - }
> - if (mkdir(path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH)) {
> - SYSERROR("Failed to create /dev/pts in container");
> - return -1;
> - }
> - if (mount("/", path, NULL, MS_BIND|MS_REC, 0)) {
> - SYSERROR("Failed to rbind mount / to %s", path);
> - return -1;
> - }
> - if (mount("", destpath, NULL, MS_SLAVE|MS_REC, 0)) {
> - SYSERROR("Failed to make tmp-/ at %s rslave", path);
> - return -1;
> - }
> - if (chroot(path)) {
> - SYSERROR("Failed to chroot into tmp-/");
> - return -1;
> - }
> - if (chdir("/")) {
> - SYSERROR("Failed to chdir into tmp-/");
> - return -1;
> - }
> - INFO("Chrooted into tmp-/ at %s", path);
> - return 0;
> -}
> -
> static int setup_rootfs(struct lxc_conf *conf)
> {
> const struct lxc_rootfs *rootfs = &conf->rootfs;
> @@ -1548,12 +1486,106 @@ static int setup_rootfs(struct lxc_conf *conf)
> return 0;
> }
>
> +int prepare_ramfs_root(char *root)
> +{
> + char buf[LINELEN], *p;
> + char nroot[PATH_MAX];
> + FILE *f;
> + int i;
> + char *p2;
> +
> + if (realpath(root, nroot) == NULL)
> + return -1;
> +
> + if (chdir("/") == -1)
> + return -1;
> +
> + /*
> + * We could use here MS_MOVE, but in userns this mount is
> + * locked and can't be moved.
> + */
> + if (mount(root, "/", NULL, MS_REC | MS_BIND, NULL)) {
> + SYSERROR("Failed to move %s into /", root);
> + return -1;
> + }
> +
> + if (mount(".", NULL, NULL, MS_REC | MS_PRIVATE, NULL)) {
> + SYSERROR("Failed to make . rprivate");
> + return -1;
> + }
> +
> + /*
> + * The following code cleans up inhereted mounts which are not
> + * required for CT.
> + *
> + * The mountinfo file shows not all mounts, if a few points have been
> + * unmounted between read operations from the mountinfo. So we need to
> + * read mountinfo a few times.
> + *
> + * This loop can be skipped if a container uses unserns, because all
> + * inherited mounts are locked and we should live with all this trash.
> + */
> + while (1) {
> + int progress = 0;
> +
> + f = fopen("./proc/self/mountinfo", "r");
> + if (!f) {
> + SYSERROR("Unable to open /proc/self/mountinfo");
> + return -1;
> + }
> + while (fgets(buf, LINELEN, f)) {
> + for (p = buf, i=0; p && i < 4; i++)
> + p = strchr(p+1, ' ');
> + if (!p)
> + continue;
> + p2 = strchr(p+1, ' ');
> + if (!p2)
> + continue;
> +
> + *p2 = '\0';
> + *p = '.';
> +
> + if (strcmp(p + 1, "/") == 0)
> + continue;
> + if (strcmp(p + 1, "/proc") == 0)
> + continue;
> +
> + if (umount2(p, MNT_DETACH) == 0)
> + progress++;
> + }
> + fclose(f);
> + if (!progress)
> + break;
> + }
> +
> + if (umount2("./proc", MNT_DETACH)) {
> + SYSERROR("Unable to umount /proc");
> + return -1;
> + }
> +
> + /* It is weird, but chdir("..") moves us in a new root */
> + if (chdir("..") == -1) {
> + SYSERROR("Unable to change working directory");
> + return -1;
> + }
> +
> + if (chroot(".") == -1) {
> + SYSERROR("Unable to chroot");
> + return -1;
> + }
> +
> + return 0;
> +}
> +
> static int setup_pivot_root(const struct lxc_rootfs *rootfs)
> {
> if (!rootfs->path)
> return 0;
>
> - if (setup_rootfs_pivot_root(rootfs->mount, rootfs->pivot)) {
> + if (detect_ramfs_rootfs()) {
> + if (prepare_ramfs_root(rootfs->mount))
> + return -1;
> + } else if (setup_rootfs_pivot_root(rootfs->mount, rootfs->pivot)) {
> ERROR("failed to setup pivot root");
> return -1;
> }
> @@ -3952,14 +3984,6 @@ int do_rootfs_setup(struct lxc_conf *conf, const char *name, const char *lxcpath
> }
> }
>
> - if (1 || detect_ramfs_rootfs()) {
> - ERROR("Failed to chroot into slave /");
> - if (chroot_into_slave(conf)) {
> - ERROR("Failed to chroot into slave /");
> - return -1;
> - }
> - }
> -
> remount_all_slave();
>
> if (run_lxc_hooks(name, "pre-mount", conf, lxcpath, NULL)) {
> --
> 1.9.3
>
More information about the lxc-devel
mailing list