[lxc-devel] [PATCH 1/2] c/r: rework external mountpoint handling v3

Serge Hallyn serge.hallyn at ubuntu.com
Wed Apr 15 15:57:32 UTC 2015


Quoting Tycho Andersen (tycho.andersen at canonical.com):
> CRIU now supports autodetection of external mounts via the --ext-mount-map auto
> --enable-external-sharing --enable-external-masters options, so we don't need
> to explicitly pass the cgmanager mount or any of the mounts from the config.
> This also means that lxcfs mounts (since they are bind mounts from outside the
> container) are autodetected, meaning that c/r of containers using lxcfs works.
> 
> A further advantage of this patch is that it addresses some of the ugliness
> that was in the exec_criu() function. There are other criu options that will
> allow us to trim this even further, though.
> 
> Finally, with --enable-external-masters, criu understands slave mounts in the
> container with shared mounts in the peer group that are outside the namespace.
> This allows containers on a systemd host to be dumped and restored correctly.
> 
> However, these options have just landed in criu trunk today, and the next
> tagged release will be 1.6 on June 1, so we should avoid merging this into any
> stable releases until then.
> 
> v2: remount / as private before bind mounting the container's directory for
>     criu. The problem here is that if / is mounted as shared, even if we
>     unshare() the /var/lib/lxc/rootfs mountpoint propagates outside of our
>     mount namespace, which is bad, since we don't want to leak mounts. In
>     particular, this leak confuses criu the second time it goes to checkpoint
>     the container.
> 
> v3: whoops, we really want / as MS_SLAVE | MS_REC here, to match what start
>     does

Can you justify using MS_SLAVE?  (I'm not opposed, just wondering whether
there are specific reasons)

We probably do want MS_SLAVE so that any umount -l from the host is less
likely to leave a busy mount in the container pinning a device on the host.

re MS_REC I was thinking you were doing this bc / was going to be bind-mounted
into the container, but that's not the case, is it?  If you're doing this
so we can mount to our heart's content later on without worrying about the
host, then MS_REC is needed.

> Signed-off-by: Tycho Andersen <tycho.andersen at canonical.com>
> ---
>  src/lxc/lxccontainer.c | 90 +++++++++++---------------------------------------
>  1 file changed, 20 insertions(+), 70 deletions(-)
> 
> diff --git a/src/lxc/lxccontainer.c b/src/lxc/lxccontainer.c
> index 3c3ff33..db11947 100644
> --- a/src/lxc/lxccontainer.c
> +++ b/src/lxc/lxccontainer.c
> @@ -3708,18 +3708,18 @@ struct criu_opts {
>  static void exec_criu(struct criu_opts *opts)
>  {
>  	char **argv, log[PATH_MAX];
> -	int static_args = 14, argc = 0, i, ret;
> +	int static_args = 18, argc = 0, i, ret;
>  	int netnr = 0;
>  	struct lxc_list *it;
>  
> -	struct mntent mntent;
>  	char buf[4096];
>  	FILE *mnts = NULL;
>  
>  	/* The command line always looks like:
>  	 * criu $(action) --tcp-established --file-locks --link-remap --force-irmap \
>  	 * --manage-cgroups action-script foo.sh -D $(directory) \
> -	 * -o $(directory)/$(action).log
> +	 * -o $(directory)/$(action).log --ext-mount-map auto
> +	 * --enable-external-sharing --enable-external-masters
>  	 * +1 for final NULL */
>  
>  	if (strcmp(opts->action, "dump") == 0) {
> @@ -3746,11 +3746,6 @@ static void exec_criu(struct criu_opts *opts)
>  		return;
>  	}
>  
> -	// We need to tell criu where cgmanager's socket is bind mounted from
> -	// if it exists since it's external.
> -	if (cgroup_driver() == CGMANAGER)
> -		static_args+=2;
> -
>  	argv = malloc(static_args * sizeof(*argv));
>  	if (!argv)
>  		return;
> @@ -3780,6 +3775,10 @@ static void exec_criu(struct criu_opts *opts)
>  	DECLARE_ARG("--link-remap");
>  	DECLARE_ARG("--force-irmap");
>  	DECLARE_ARG("--manage-cgroups");
> +	DECLARE_ARG("--ext-mount-map");
> +	DECLARE_ARG("auto");
> +	DECLARE_ARG("--enable-external-sharing");
> +	DECLARE_ARG("--enable-external-masters");
>  	DECLARE_ARG("--action-script");
>  	DECLARE_ARG(DATADIR "/lxc/lxc-restore-net");
>  	DECLARE_ARG("-D");
> @@ -3790,35 +3789,9 @@ static void exec_criu(struct criu_opts *opts)
>  	if (opts->verbose)
>  		DECLARE_ARG("-vvvvvv");
>  
> -	/*
> -	 * Note: this macro is not intended to be called unless argc is equal
> -	 * to the length of the array; there is nothing that keeps track of the
> -	 * length of the array besides the location in the code that this is
> -	 * called. (Yes this is bad, and we should fix it.)
> -	 */
> -#define RESIZE_ARGS(additional) 						\
> -	do {									\
> -		void *m;							\
> -		if (additional < 0) {						\
> -			ERROR("resizing by negative amount");			\
> -			goto err;						\
> -		} else if (additional == 0)					\
> -			continue;						\
> -										\
> -		m = realloc(argv, (argc + additional + 1) * sizeof(*argv));	\
> -		if (!m)								\
> -			goto err;						\
> -		argv = m;							\
> -	} while (0)
> -
>  	if (strcmp(opts->action, "dump") == 0) {
>  		char pid[32];
>  
> -		if (cgroup_driver() == CGMANAGER) {
> -			DECLARE_ARG("--ext-mount-map");
> -			DECLARE_ARG("/sys/fs/cgroup/cgmanager:cgmanager");
> -		}
> -
>  		if (sprintf(pid, "%d", lxcapi_init_pid(opts->c)) < 0)
>  			goto err;
>  
> @@ -3827,11 +3800,8 @@ static void exec_criu(struct criu_opts *opts)
>  		if (!opts->stop)
>  			DECLARE_ARG("--leave-running");
>  	} else if (strcmp(opts->action, "restore") == 0) {
> -
> -		if (cgroup_driver() == CGMANAGER) {
> -			DECLARE_ARG("--ext-mount-map");
> -			DECLARE_ARG("cgmanager:/sys/fs/cgroup/cgmanager");
> -		}
> +		void *m;
> +		int additional;
>  
>  		DECLARE_ARG("--root");
>  		DECLARE_ARG(opts->c->lxc_conf->rootfs.mount);
> @@ -3842,7 +3812,12 @@ static void exec_criu(struct criu_opts *opts)
>  		DECLARE_ARG("--cgroup-root");
>  		DECLARE_ARG(opts->cgroup_path);
>  
> -		RESIZE_ARGS(lxc_list_len(&opts->c->lxc_conf->network) * 2);
> +		additional = lxc_list_len(&opts->c->lxc_conf->network) * 2;
> +
> +		m = realloc(argv, (argc + additional + 1) * sizeof(*argv));	\
> +		if (!m)								\
> +			goto err;						\
> +		argv = m;
>  
>  		lxc_list_for_each(it, &opts->c->lxc_conf->network) {
>  			char eth[128], *veth;
> @@ -3864,37 +3839,8 @@ static void exec_criu(struct criu_opts *opts)
>  			DECLARE_ARG("--veth-pair");
>  			DECLARE_ARG(buf);
>  		}
> -	}
> -
> -	// CRIU wants to know about any external bind mounts the
> -	// container has.
> -	mnts = write_mount_file(&opts->c->lxc_conf->mount_list);
> -	if (!mnts)
> -		goto err;
> -
> -	RESIZE_ARGS(lxc_list_len(&opts->c->lxc_conf->mount_list) * 2);
> -
> -	while (getmntent_r(mnts, &mntent, buf, sizeof(buf))) {
> -		char arg[2048], *key, *val;
> -		int ret;
> -
> -		if (strcmp(opts->action, "dump") == 0) {
> -			key = mntent.mnt_fsname;
> -			val = mntent.mnt_dir;
> -		} else {
> -			key = mntent.mnt_dir;
> -			val = mntent.mnt_fsname;
> -		}
> -
> -		ret = snprintf(arg, sizeof(arg), "%s:%s", key, val);
> -		if (ret < 0 || ret >= sizeof(arg)) {
> -			goto err;
> -		}
>  
> -		DECLARE_ARG("--ext-mount-map");
> -		DECLARE_ARG(arg);
>  	}
> -	fclose(mnts);
>  
>  	argv[argc] = NULL;
>  
> @@ -3932,7 +3878,6 @@ static void exec_criu(struct criu_opts *opts)
>  	}
>  
>  #undef DECLARE_ARG
> -#undef RESIZE_ARGS
>  	execv(argv[0], argv);
>  err:
>  	if (mnts)
> @@ -4176,6 +4121,11 @@ static void do_restore(struct lxc_container *c, int pipe, char *directory, bool
>  			if (mkdir(rootfs->mount, 0755) < 0 && errno != EEXIST)
>  				goto out_fini_handler;
>  
> +			if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) < 0) {
> +				SYSERROR("remount / to private failed");
> +				goto out_fini_handler;
> +			}
> +
>  			if (mount(rootfs->path, rootfs->mount, NULL, MS_BIND, NULL) < 0) {
>  				rmdir(rootfs->mount);
>  				goto out_fini_handler;
> -- 
> 2.1.4
> 
> _______________________________________________
> lxc-devel mailing list
> lxc-devel at lists.linuxcontainers.org
> http://lists.linuxcontainers.org/listinfo/lxc-devel


More information about the lxc-devel mailing list