[lxc-devel] [PATCH] c/r: rework external mountpoint handling v2
Serge Hallyn
serge.hallyn at ubuntu.com
Wed Apr 15 15:48:10 UTC 2015
Quoting Tycho Andersen (tycho.andersen at canonical.com):
> CRIU now supports autodetection of external mounts via the --ext-mount-map auto
> --enable-external-sharing --enable-external-masters options, so we don't need
> to explicitly pass the cgmanager mount or any of the mounts from the config.
> This also means that lxcfs mounts (since they are bind mounts from outside the
> container) are autodetected, meaning that c/r of containers using lxcfs works.
>
> A further advantage of this patch is that it addresses some of the ugliness
> that was in the exec_criu() function. There are other criu options that will
> allow us to trim this even further, though.
>
> Finally, with --enable-external-masters, criu understands slave mounts in the
> container with shared mounts in the peer group that are outside the namespace.
> This allows containers on a systemd host to be dumped and restored correctly.
>
> However, these options have just landed in criu trunk today, and the next
> tagged release will be 1.6 on June 1, so we should avoid merging this into any
Ouch, June 1, that's a ways away.
> stable releases until then.
Should there be a lxc_check_criu_version() fn, updated in cases like these,
which ensures that criu is new enough version for the lxc version?
Given the heavy criu development I do NOT think we should fallback to
different behavior on older versions - just require the minversion we
need and fail with a nice informational message otherwise.
> v2: remount / as private before bind mounting the container's directory for
It's probably a whacky enough case that lxc+criu would fail anyway, but
it *is* possible for / to not be a mountpoint. Init could chroot you
instead of pivot-rooting. In that case you'd have to bind-mount / onto
itself before doing the MS_PRIVATE remount.
I'm also kind of surprised it worked without a MS_REMOUNT flag, but <shrug>
> criu. The problem here is that if / is mounted as shared, even if we
> unshare() the /var/lib/lxc/rootfs mountpoint propagates outside of our
> mount namespace, which is bad, since we don't want to leak mounts. In
> particular, this leak confuses criu the second time it goes to checkpoint
> the container.
>
> Signed-off-by: Tycho Andersen <tycho.andersen at canonical.com>
Acked-by: Serge E. Hallyn <serge.hallyn at ubuntu.com>
But where do we keep this? We need a github cronjob to post merge request
on a given date.
> ---
> src/lxc/lxccontainer.c | 89 ++++++++++++--------------------------------------
> 1 file changed, 20 insertions(+), 69 deletions(-)
>
> diff --git a/src/lxc/lxccontainer.c b/src/lxc/lxccontainer.c
> index 3c3ff33..d759703 100644
> --- a/src/lxc/lxccontainer.c
> +++ b/src/lxc/lxccontainer.c
> @@ -3708,18 +3708,18 @@ struct criu_opts {
> static void exec_criu(struct criu_opts *opts)
> {
> char **argv, log[PATH_MAX];
> - int static_args = 14, argc = 0, i, ret;
> + int static_args = 18, argc = 0, i, ret;
> int netnr = 0;
> struct lxc_list *it;
>
> - struct mntent mntent;
> char buf[4096];
> FILE *mnts = NULL;
>
> /* The command line always looks like:
> * criu $(action) --tcp-established --file-locks --link-remap --force-irmap \
> * --manage-cgroups action-script foo.sh -D $(directory) \
> - * -o $(directory)/$(action).log
> + * -o $(directory)/$(action).log --ext-mount-map auto
> + * --enable-external-sharing --enable-external-masters
> * +1 for final NULL */
>
> if (strcmp(opts->action, "dump") == 0) {
> @@ -3746,11 +3746,6 @@ static void exec_criu(struct criu_opts *opts)
> return;
> }
>
> - // We need to tell criu where cgmanager's socket is bind mounted from
> - // if it exists since it's external.
> - if (cgroup_driver() == CGMANAGER)
> - static_args+=2;
> -
> argv = malloc(static_args * sizeof(*argv));
> if (!argv)
> return;
> @@ -3780,6 +3775,10 @@ static void exec_criu(struct criu_opts *opts)
> DECLARE_ARG("--link-remap");
> DECLARE_ARG("--force-irmap");
> DECLARE_ARG("--manage-cgroups");
> + DECLARE_ARG("--ext-mount-map");
> + DECLARE_ARG("auto");
> + DECLARE_ARG("--enable-external-sharing");
> + DECLARE_ARG("--enable-external-masters");
> DECLARE_ARG("--action-script");
> DECLARE_ARG(DATADIR "/lxc/lxc-restore-net");
> DECLARE_ARG("-D");
> @@ -3790,35 +3789,9 @@ static void exec_criu(struct criu_opts *opts)
> if (opts->verbose)
> DECLARE_ARG("-vvvvvv");
>
> - /*
> - * Note: this macro is not intended to be called unless argc is equal
> - * to the length of the array; there is nothing that keeps track of the
> - * length of the array besides the location in the code that this is
> - * called. (Yes this is bad, and we should fix it.)
> - */
> -#define RESIZE_ARGS(additional) \
> - do { \
> - void *m; \
> - if (additional < 0) { \
> - ERROR("resizing by negative amount"); \
> - goto err; \
> - } else if (additional == 0) \
> - continue; \
> - \
> - m = realloc(argv, (argc + additional + 1) * sizeof(*argv)); \
> - if (!m) \
> - goto err; \
> - argv = m; \
> - } while (0)
> -
> if (strcmp(opts->action, "dump") == 0) {
> char pid[32];
>
> - if (cgroup_driver() == CGMANAGER) {
> - DECLARE_ARG("--ext-mount-map");
> - DECLARE_ARG("/sys/fs/cgroup/cgmanager:cgmanager");
> - }
> -
> if (sprintf(pid, "%d", lxcapi_init_pid(opts->c)) < 0)
> goto err;
>
> @@ -3827,11 +3800,8 @@ static void exec_criu(struct criu_opts *opts)
> if (!opts->stop)
> DECLARE_ARG("--leave-running");
> } else if (strcmp(opts->action, "restore") == 0) {
> -
> - if (cgroup_driver() == CGMANAGER) {
> - DECLARE_ARG("--ext-mount-map");
> - DECLARE_ARG("cgmanager:/sys/fs/cgroup/cgmanager");
> - }
> + void *m;
> + int additional;
>
> DECLARE_ARG("--root");
> DECLARE_ARG(opts->c->lxc_conf->rootfs.mount);
> @@ -3842,7 +3812,12 @@ static void exec_criu(struct criu_opts *opts)
> DECLARE_ARG("--cgroup-root");
> DECLARE_ARG(opts->cgroup_path);
>
> - RESIZE_ARGS(lxc_list_len(&opts->c->lxc_conf->network) * 2);
> + additional = lxc_list_len(&opts->c->lxc_conf->network) * 2;
> +
> + m = realloc(argv, (argc + additional + 1) * sizeof(*argv)); \
> + if (!m) \
> + goto err; \
> + argv = m;
>
> lxc_list_for_each(it, &opts->c->lxc_conf->network) {
> char eth[128], *veth;
> @@ -3864,37 +3839,8 @@ static void exec_criu(struct criu_opts *opts)
> DECLARE_ARG("--veth-pair");
> DECLARE_ARG(buf);
> }
> - }
> -
> - // CRIU wants to know about any external bind mounts the
> - // container has.
> - mnts = write_mount_file(&opts->c->lxc_conf->mount_list);
> - if (!mnts)
> - goto err;
> -
> - RESIZE_ARGS(lxc_list_len(&opts->c->lxc_conf->mount_list) * 2);
> -
> - while (getmntent_r(mnts, &mntent, buf, sizeof(buf))) {
> - char arg[2048], *key, *val;
> - int ret;
> -
> - if (strcmp(opts->action, "dump") == 0) {
> - key = mntent.mnt_fsname;
> - val = mntent.mnt_dir;
> - } else {
> - key = mntent.mnt_dir;
> - val = mntent.mnt_fsname;
> - }
> -
> - ret = snprintf(arg, sizeof(arg), "%s:%s", key, val);
> - if (ret < 0 || ret >= sizeof(arg)) {
> - goto err;
> - }
>
> - DECLARE_ARG("--ext-mount-map");
> - DECLARE_ARG(arg);
> }
> - fclose(mnts);
>
> argv[argc] = NULL;
>
> @@ -4176,6 +4122,11 @@ static void do_restore(struct lxc_container *c, int pipe, char *directory, bool
> if (mkdir(rootfs->mount, 0755) < 0 && errno != EEXIST)
> goto out_fini_handler;
>
> + if (mount(NULL, "/", NULL, MS_PRIVATE, NULL) < 0) {
> + SYSERROR("remount / to private failed");
> + goto out_fini_handler;
> + }
> +
> if (mount(rootfs->path, rootfs->mount, NULL, MS_BIND, NULL) < 0) {
> rmdir(rootfs->mount);
> goto out_fini_handler;
> --
> 2.1.4
>
> _______________________________________________
> lxc-devel mailing list
> lxc-devel at lists.linuxcontainers.org
> http://lists.linuxcontainers.org/listinfo/lxc-devel
More information about the lxc-devel
mailing list