[lxc-devel] [PATCH] lxcapi_restore shouldn't steal the calling process

Tycho Andersen tycho.andersen at canonical.com
Thu Mar 26 14:52:15 UTC 2015


On Wed, Mar 25, 2015 at 11:05:23AM -0600, Tycho Andersen wrote:
> Previously, lxcapi_restore used the calling process as the lxc monitor process
> (and just never returned), requiring users to fork before calling it. This, of
> course, would cause problems for things like LXD, which can't fork.
> 
> Now, restore() forks the monitor as a child of the process that calls it. Users
> who want to daemonize the restore process need to fork themselves.
> lxc-checkpoint has been updated to reflect this behavior change.
> 
> Signed-off-by: Tycho Andersen <tycho.andersen at canonical.com>
> ---
>  src/lxc/lxc_checkpoint.c |  48 +++++++++++++++------
>  src/lxc/lxccontainer.c   | 109 ++++++++++++++++++++++++++++++++++-------------
>  2 files changed, 114 insertions(+), 43 deletions(-)
> 
> diff --git a/src/lxc/lxc_checkpoint.c b/src/lxc/lxc_checkpoint.c
> index cfa08fc..2e76c2e 100644
> --- a/src/lxc/lxc_checkpoint.c
> +++ b/src/lxc/lxc_checkpoint.c
> @@ -20,6 +20,8 @@
>  #include <stdio.h>
>  #include <errno.h>
>  #include <unistd.h>
> +#include <sys/types.h>
> +#include <sys/wait.h>
>  
>  #include <lxc/lxccontainer.h>
>  
> @@ -27,6 +29,7 @@
>  #include "config.h"
>  #include "lxc.h"
>  #include "arguments.h"
> +#include "utils.h"
>  
>  static char *checkpoint_dir = NULL;
>  static bool stop = false;
> @@ -139,36 +142,53 @@ bool checkpoint(struct lxc_container *c)
>  	return true;
>  }
>  
> -bool restore(struct lxc_container *c)
> +bool restore_finalize(struct lxc_container *c)
>  {
> -	pid_t pid = 0;
> -	bool ret = true;
> +	bool ret = c->restore(c, checkpoint_dir, verbose);
> +	if (!ret) {
> +		fprintf(stderr, "Restoring %s failed.\n", my_args.name);
> +	}
>  
> +	lxc_container_put(c);
> +	return ret;
> +}
> +
> +bool restore(struct lxc_container *c)
> +{
>  	if (c->is_running(c)) {
>  		fprintf(stderr, "%s is running, not restoring.\n", my_args.name);
>  		lxc_container_put(c);
>  		return false;
>  	}
>  
> -	if (my_args.daemonize)
> +	if (my_args.daemonize) {
> +		pid_t pid;
> +
>  		pid = fork();
> +		if (pid < 0) {
> +			perror("fork");
> +			return false;
> +		}
>  
> -	if (pid == 0) {
> -		if (my_args.daemonize) {
> +		if (pid == 0) {
>  			close(0);
>  			close(1);
> -		}
>  
> -		ret = c->restore(c, checkpoint_dir, verbose);
> -
> -		if (!ret) {
> -			fprintf(stderr, "Restoring %s failed.\n", my_args.name);
> +			exit(!restore_finalize(c));
> +		} else {
> +			return wait_for_pid(pid) == 0;
>  		}
> -	}
> +	} else {
> +		int status;
>  
> -	lxc_container_put(c);
> +		if (!restore_finalize(c))
> +			return false;
>  
> -	return ret;
> +		if (waitpid(-1, &status, 0) < 0)
> +			return false;
> +
> +		return WIFEXITED(status) && WEXITSTATUS(status) == 0;
> +	}
>  }
>  
>  int main(int argc, char *argv[])
> diff --git a/src/lxc/lxccontainer.c b/src/lxc/lxccontainer.c
> index 4422f4a..ecea7bf 100644
> --- a/src/lxc/lxccontainer.c
> +++ b/src/lxc/lxccontainer.c
> @@ -3853,28 +3853,20 @@ out_unlock:
>  	return !has_error;
>  }
>  
> -static bool lxcapi_restore(struct lxc_container *c, char *directory, bool verbose)
> +// do_restore never returns, the calling process is used as the
> +// monitor process. do_restore calls exit() if it fails.
> +static void do_restore(struct lxc_container *c, int pipe, char *directory, bool verbose)
>  {
>  	pid_t pid;
> -	struct lxc_rootfs *rootfs;
>  	char pidfile[L_tmpnam];
>  	struct lxc_handler *handler;
> -	bool has_error = true;
> -
> -	if (!criu_ok(c))
> -		return false;
> -
> -	if (geteuid()) {
> -		ERROR("Must be root to restore\n");
> -		return false;
> -	}
>  
>  	if (!tmpnam(pidfile))
> -		return false;
> +		exit(1);
>  
>  	handler = lxc_init(c->name, c->lxc_conf, c->config_path);
>  	if (!handler)
> -		return false;
> +		exit(1);
>  
>  	if (!cgroup_init(handler)) {
>  		ERROR("failed initing cgroups");
> @@ -3897,9 +3889,10 @@ static bool lxcapi_restore(struct lxc_container *c, char *directory, bool verbos
>  
>  	if (pid == 0) {
>  		struct criu_opts os;
> +		struct lxc_rootfs *rootfs;
>  
>  		if (unshare(CLONE_NEWNS))
> -			exit(1);
> +			goto out_fini_handler;
>  
>  		/* CRIU needs the lxc root bind mounted so that it is the root of some
>  		 * mount. */
> @@ -3907,15 +3900,14 @@ static bool lxcapi_restore(struct lxc_container *c, char *directory, bool verbos
>  
>  		if (rootfs_is_blockdev(c->lxc_conf)) {
>  			if (do_rootfs_setup(c->lxc_conf, c->name, c->config_path) < 0)
> -				exit(1);
> -		}
> -		else {
> +				goto out_fini_handler;
> +		} else {
>  			if (mkdir(rootfs->mount, 0755) < 0 && errno != EEXIST)
> -				exit(1);
> +				goto out_fini_handler;
>  
>  			if (mount(rootfs->path, rootfs->mount, NULL, MS_BIND, NULL) < 0) {
>  				rmdir(rootfs->mount);
> -				exit(1);
> +				goto out_fini_handler;
>  			}
>  		}
>  
> @@ -3930,22 +3922,28 @@ static bool lxcapi_restore(struct lxc_container *c, char *directory, bool verbos
>  		exec_criu(&os);
>  		umount(rootfs->mount);
>  		rmdir(rootfs->mount);
> -		exit(1);
> +		goto out_fini_handler;
>  	} else {
>  		int status;
> +		char title[2048];
> +		int ret;
>  
>  		pid_t w = waitpid(pid, &status, 0);
> -
>  		if (w == -1) {
>  			perror("waitpid");
>  			goto out_fini_handler;
>  		}
>  
> +		if (sizeof(status) != write(pipe, &status, sizeof(status))) {
> +			perror("write");
> +			ERROR("failed to write all of status");
> +			goto out_fini_handler;
> +		}
> +
>  		if (WIFEXITED(status)) {
>  			if (WEXITSTATUS(status)) {
>  				goto out_fini_handler;
> -			}
> -			else {
> +			} else {
>  				int ret;
>  				FILE *f = fopen(pidfile, "r");
>  				if (!f) {
> @@ -3969,17 +3967,70 @@ static bool lxcapi_restore(struct lxc_container *c, char *directory, bool verbos
>  			goto out_fini_handler;
>  		}
>  
> -		if (lxc_poll(c->name, handler)) {
> +		/*
> +		 * See comment in lxcapi_start; we don't care if these
> +		 * fail because it's just a beauty thing. We just
> +		 * assign the return here to silence potential.
> +		 */
> +		ret = snprintf(title, sizeof(title), "[lxc monitor] %s %s", c->config_path, c->name);
> +		ret = setproctitle(title);
> +
> +		ret = lxc_poll(c->name, handler);
> +		if (ret)
>  			lxc_abort(c->name, handler);
> -			goto out_fini_handler;
> -		}
> +		lxc_fini(c->name, handler);
> +		exit(ret);
>  	}
>  
> -	has_error = false;
> -
>  out_fini_handler:
>  	lxc_fini(c->name, handler);
> -	return !has_error;
> +	exit(1);
> +}
> +
> +static bool lxcapi_restore(struct lxc_container *c, char *directory, bool verbose)
> +{
> +	pid_t pid;
> +	int status;
> +	int pipefd[2];
> +
> +	if (!criu_ok(c))
> +		return false;
> +
> +	if (geteuid()) {
> +		ERROR("Must be root to restore\n");
> +		return false;
> +	}
> +
> +	if (pipe(pipefd)) {

d'oh, looks like I forgot to close the pipe everywhere. I'll resend
with that in a bit.

> +		ERROR("failed to create pipe");
> +		return false;
> +	}
> +
> +	pid = fork();
> +	if (pid < 0)
> +		return false;
> +
> +	if (pid == 0) {
> +		// this never returns
> +		do_restore(c, pipefd[1], directory, verbose);
> +	}
> +
> +	if (sizeof(status) != read(pipefd[0], &status, sizeof(status))) {
> +		ERROR("reading status from pipe failed");
> +		goto err_wait;
> +	}
> +
> +	// If the criu process was killed or exited nonzero, wait() for the
> +	// handler, since the restore process died. Otherwise, we don't need to
> +	// wait, since the child becomes the monitor process.
> +	if (!WIFEXITED(status) || WEXITSTATUS(status))
> +		goto err_wait;
> +	return true;
> +
> +err_wait:
> +	if (wait_for_pid(pid))
> +		ERROR("restore process died");
> +	return false;
>  }
>  
>  static int lxcapi_attach_run_waitl(struct lxc_container *c, lxc_attach_options_t *options, const char *program, const char *arg, ...)
> -- 
> 2.1.0
> 


More information about the lxc-devel mailing list