[lxc-devel] [RFC 1/2] lxc-start: added --start-frozen

Serge Hallyn serge.hallyn at ubuntu.com
Tue Jan 19 16:54:59 UTC 2016


Quoting Wolfgang Bumiller (w.bumiller at proxmox.com):
> On Mon, Jan 18, 2016 at 11:18:32PM +0000, Serge Hallyn wrote:
> > Quoting Wolfgang Bumiller (w.bumiller at proxmox.com):
> > > --- a/src/lxc/lxccontainer.h
> > > +++ b/src/lxc/lxccontainer.h
> > > @@ -245,6 +245,16 @@ struct lxc_container {
> > >  	bool (*want_close_all_fds)(struct lxc_container *c, bool state);
> > >  
> > >  	/*!
> > > +	 * \brief Specify whether the container wishes start in a frozen state.
> > > +	 *
> > > +	 * \param c Container.
> > > +	 * \param state Value for the start_frozen bit (0 or 1).
> > > +	 *
> > > +	 * \return \c true on success, else \c false.
> > > +	 */
> > > +	bool (*set_start_frozen)(struct lxc_container *c, bool state);
> > 
> > Hi,
> > 
> > new functions must go at the end so as not to force linked applications
> > to be relinked.
> 
> Does this also hold for cgroup_ops? (That would look ugly though...)

No - the reason it applies for lxc_container is because other software
calls its members.  That doesn't happen with cgroup_ops.

> > > +
> > > +	/*!
> > >  	 * \brief Return current config file name.
> > >  	 *
> > >  	 * \param c Container.
> > > diff --git a/src/lxc/start.c b/src/lxc/start.c
> > > index 79dbe33..84a05f9 100644
> > > --- a/src/lxc/start.c
> > > +++ b/src/lxc/start.c
> > > @@ -1343,6 +1343,23 @@ static int start(struct lxc_handler *handler, void* data)
> > >  {
> > >  	struct start_args *arg = data;
> > >  
> > > +	if (handler->conf->start_frozen) {
> > > +		char *cgfile = NULL;
> > > +		NOTICE("freezing '%s'", arg->argv[0]);
> > > +		/* Let the monitor know we reached this point. */
> > > +		lxc_sync_fini_child(handler);
> > > +		if (asprintf(&cgfile, "/sys/fs/cgroup/freezer/lxc/%s/freezer.state", handler->name) == -1) {
> > > +			ERROR("failed to allocate memory trying to freezed container '%s'", handler->name);
> > > +			return -1;
> > > +		}
> > 
> > Hm, so this is done fromthe container's namespaces, and makes
> > assumptions about the cgroup layout.  Won't work for unprivileged
> > or nested containers (without cgroup namespace).
> >
> > Depending on how important this is for you, it might be worth
> > putting a sync point here to have the parent move the child.
> > Except how will the child get to executing init?  Can you use
> > ptrace somehow, have the parent step through until the child's
> > exec() and then freeze it and release the ptrace?
> 
> From our perspective executing init isn't really an issue as we don't
> see a reason to distinguish between the process before and after the
> exec() call. Freezing before exec() might even be better since it sort

What is your goal for this?  To strace?

> of guarantees the init process hasn't started touching the container
> yet, so I made a variant of the patch with an additional sync point.
> 
> I'm attaching a new version inline stripped off doc changes and signoff
> since it's most likely not final yet anyway.
> 
> I also came across another possible bug/cleanup situation, see the
> src/lxc/start.c changes: if the LXC_SYNC_POST_CGROUP barrier fails it
> returns rather than doing a `goto out_delete_net` like the calls above
> it, also the calls below revert back to `goto out_abort` and should
> probably be out_delete_net, too? (probably happend due to patch-contexts
> given that git-blame shows many authors for small groups of lines).
> 
> New patch:
> ---
> From 43cc837c1782ef5c47e455922e5e03e6aefee1ee Mon Sep 17 00:00:00 2001
> From: Wolfgang Bumiller <w.bumiller at proxmox.com>
> Date: Thu, 14 Jan 2016 11:09:06 +0100
> Subject: [PATCH] lxc-start: added --start-frozen
> 
> Add the possibility to start a container in a frozen state.
> 
> ---
> 
> diff --git a/src/lxc/arguments.h b/src/lxc/arguments.h
> index 898a9ce..066754b 100644
> --- a/src/lxc/arguments.h
> +++ b/src/lxc/arguments.h
> @@ -84,6 +84,9 @@ struct lxc_arguments {
>  	/* close fds from parent? */
>  	int close_all_fds;
>  
> +	/* freeze before starting init */
> +	int start_frozen;
> +
>  	/* lxc-create */
>  	char *bdevtype, *configfile, *template;
>  	char *fstype;
> diff --git a/src/lxc/cgfs.c b/src/lxc/cgfs.c
> index da88bb7..ee0b92b 100644
> --- a/src/lxc/cgfs.c
> +++ b/src/lxc/cgfs.c
> @@ -2392,7 +2392,7 @@ out:
>  	return ret;
>  }
>  
> -static bool cgfs_unfreeze(void *hdata)
> +static bool cgfs_freezer_set(void *hdata, const char *state)
>  {
>  	struct cgfs_data *d = hdata;
>  	char *cgabspath, *cgrelpath;
> @@ -2406,11 +2406,21 @@ static bool cgfs_unfreeze(void *hdata)
>  	if (!cgabspath)
>  		return false;
>  
> -	ret = do_cgroup_set(cgabspath, "freezer.state", "THAWED");
> +	ret = do_cgroup_set(cgabspath, "freezer.state", state);
>  	free(cgabspath);
>  	return ret == 0;
>  }
>  
> +static bool cgfs_freeze(void *hdata)
> +{
> +	return cgfs_freezer_set(hdata, "FROZEN");
> +}
> +
> +static bool cgfs_unfreeze(void *hdata)
> +{
> +	return cgfs_freezer_set(hdata, "THAWED");
> +}
> +
>  static bool cgroupfs_setup_limits(void *hdata, struct lxc_list *cgroup_conf,
>  				  bool with_devices)
>  {
> @@ -2460,6 +2470,7 @@ static struct cgroup_ops cgfs_ops = {
>  	.escape = cgfs_escape,
>  	.get = lxc_cgroupfs_get,
>  	.set = lxc_cgroupfs_set,
> +	.freeze = cgfs_freeze,
>  	.unfreeze = cgfs_unfreeze,
>  	.setup_limits = cgroupfs_setup_limits,
>  	.name = "cgroupfs",
> diff --git a/src/lxc/cgmanager.c b/src/lxc/cgmanager.c
> index 614f6e7..f9a7ed6 100644
> --- a/src/lxc/cgmanager.c
> +++ b/src/lxc/cgmanager.c
> @@ -1460,7 +1460,7 @@ struct cgroup_ops *cgm_ops_init(void)
>  }
>  
>  /* unfreeze is called by the command api after killing a container.  */
> -static bool cgm_unfreeze(void *hdata)
> +static bool cgm_freezer_set(void *hdata, const char *state)
>  {
>  	struct cgm_data *d = hdata;
>  	bool ret = true;
> @@ -1473,7 +1473,7 @@ static bool cgm_unfreeze(void *hdata)
>  		return false;
>  	}
>  	if (cgmanager_set_value_sync(NULL, cgroup_manager, "freezer", d->cgroup_path,
> -			"freezer.state", "THAWED") != 0) {
> +			"freezer.state", state) != 0) {
>  		NihError *nerr;
>  		nerr = nih_error_get();
>  		ERROR("call to cgmanager_set_value_sync failed: %s", nerr->message);
> @@ -1485,6 +1485,16 @@ static bool cgm_unfreeze(void *hdata)
>  	return ret;
>  }
>  
> +static bool cgm_freeze(void *hdata)
> +{
> +	return cgm_freezer_set(hdata, "FROZEN");
> +}
> +
> +static bool cgm_unfreeze(void *hdata)
> +{
> +	return cgm_freezer_set(hdata, "THAWED");
> +}
> +
>  static bool cgm_setup_limits(void *hdata, struct lxc_list *cgroup_settings, bool do_devices)
>  {
>  	struct cgm_data *d = hdata;
> @@ -1666,6 +1676,7 @@ static struct cgroup_ops cgmanager_ops = {
>  	.escape = cgm_escape,
>  	.get = cgm_get,
>  	.set = cgm_set,
> +	.freeze = cgm_freeze,
>  	.unfreeze = cgm_unfreeze,
>  	.setup_limits = cgm_setup_limits,
>  	.name = "cgmanager",
> diff --git a/src/lxc/cgroup.c b/src/lxc/cgroup.c
> index 8954733..380447c 100644
> --- a/src/lxc/cgroup.c
> +++ b/src/lxc/cgroup.c
> @@ -136,6 +136,13 @@ bool cgroup_unfreeze(struct lxc_handler *handler)
>  	return false;
>  }
>  
> +bool cgroup_freeze(struct lxc_handler *handler)
> +{
> +	if (ops)
> +		return ops->freeze(handler->cgroup_data);
> +	return false;
> +}
> +
>  bool cgroup_setup_limits(struct lxc_handler *handler, bool with_devices)
>  {
>  	if (ops)
> diff --git a/src/lxc/cgroup.h b/src/lxc/cgroup.h
> index e3d3ce4..df808d1 100644
> --- a/src/lxc/cgroup.h
> +++ b/src/lxc/cgroup.h
> @@ -50,6 +50,7 @@ struct cgroup_ops {
>  	bool (*escape)(void);
>  	int (*set)(const char *filename, const char *value, const char *name, const char *lxcpath);
>  	int (*get)(const char *filename, char *value, size_t len, const char *name, const char *lxcpath);
> +	bool (*freeze)(void *hdata);
>  	bool (*unfreeze)(void *hdata);
>  	bool (*setup_limits)(void *hdata, struct lxc_list *cgroup_conf, bool with_devices);
>  	bool (*chown)(void *hdata, struct lxc_conf *conf);
> @@ -78,6 +79,7 @@ extern bool cgroup_escape(void);
>   * Currently, this call  only makes sense for privileged containers.
>   */
>  extern const char *cgroup_canonical_path(struct lxc_handler *handler);
> +extern bool cgroup_freeze(struct lxc_handler *handler);
>  extern bool cgroup_unfreeze(struct lxc_handler *handler);
>  extern void cgroup_disconnect(void);
>  extern cgroup_driver_t cgroup_driver(void);
> diff --git a/src/lxc/conf.h b/src/lxc/conf.h
> index b0274ec..5c84f5f 100644
> --- a/src/lxc/conf.h
> +++ b/src/lxc/conf.h
> @@ -312,6 +312,7 @@ struct lxc_conf {
>  	struct lxc_rootfs rootfs;
>  	char *ttydir;
>  	int close_all_fds;
> +	int start_frozen;
>  	struct lxc_list hooks[NUM_LXC_HOOKS];
>  
>  	char *lsm_aa_profile;
> diff --git a/src/lxc/lxc_start.c b/src/lxc/lxc_start.c
> index 6b942ac..73e27e3 100644
> --- a/src/lxc/lxc_start.c
> +++ b/src/lxc/lxc_start.c
> @@ -53,6 +53,7 @@
>  #define OPT_SHARE_NET OPT_USAGE+1
>  #define OPT_SHARE_IPC OPT_USAGE+2
>  #define OPT_SHARE_UTS OPT_USAGE+3
> +#define OPT_START_FROZEN OPT_USAGE+4
>  
>  lxc_log_define(lxc_start_ui, lxc);
>  
> @@ -154,6 +155,7 @@ static int my_parser(struct lxc_arguments* args, int c, char* arg)
>  	case OPT_SHARE_NET: args->share_ns[LXC_NS_NET] = arg; break;
>  	case OPT_SHARE_IPC: args->share_ns[LXC_NS_IPC] = arg; break;
>  	case OPT_SHARE_UTS: args->share_ns[LXC_NS_UTS] = arg; break;
> +	case OPT_START_FROZEN: args->start_frozen = 1; break;
>  	}
>  	return 0;
>  }
> @@ -170,6 +172,7 @@ static const struct option my_longopts[] = {
>  	{"share-net", required_argument, 0, OPT_SHARE_NET},
>  	{"share-ipc", required_argument, 0, OPT_SHARE_IPC},
>  	{"share-uts", required_argument, 0, OPT_SHARE_UTS},
> +	{"start-frozen", no_argument, 0, OPT_START_FROZEN},
>  	LXC_COMMON_OPTIONS
>  };
>  
> @@ -193,6 +196,7 @@ Options :\n\
>                           Note: --daemon implies --close-all-fds\n\
>    -s, --define KEY=VAL   Assign VAL to configuration variable KEY\n\
>        --share-[net|ipc|uts]=NAME Share a namespace with another container or pid\n\
> +      --start-frozen     Freeze the container before starting its init\n\
>  ",
>  	.options   = my_longopts,
>  	.parser    = my_parser,
> @@ -335,6 +339,9 @@ int main(int argc, char *argv[])
>  	if (my_args.close_all_fds)
>  		c->want_close_all_fds(c, true);
>  
> +	if (my_args.start_frozen)
> +		c->set_start_frozen(c, true);
> +
>  	if (args == default_args)
>  		err = c->start(c, 0, NULL) ? 0 : 1;
>  	else
> diff --git a/src/lxc/lxccontainer.c b/src/lxc/lxccontainer.c
> index 490a59d..d1147f6 100644
> --- a/src/lxc/lxccontainer.c
> +++ b/src/lxc/lxccontainer.c
> @@ -591,6 +591,21 @@ static bool do_lxcapi_want_close_all_fds(struct lxc_container *c, bool state)
>  
>  WRAP_API_1(bool, lxcapi_want_close_all_fds, bool)
>  
> +static bool do_lxcapi_set_start_frozen(struct lxc_container *c, bool state)
> +{
> +	if (!c || !c->lxc_conf)
> +		return false;
> +	if (container_mem_lock(c)) {
> +		ERROR("Error getting mem lock");
> +		return false;
> +	}
> +	c->lxc_conf->start_frozen = state;
> +	container_mem_unlock(c);
> +	return true;
> +}
> +
> +WRAP_API_1(bool, lxcapi_set_start_frozen, bool)
> +
>  static bool do_lxcapi_wait(struct lxc_container *c, const char *state, int timeout)
>  {
>  	int ret;
> @@ -4096,6 +4111,7 @@ struct lxc_container *lxc_container_new(const char *name, const char *configpath
>  	c->load_config = lxcapi_load_config;
>  	c->want_daemonize = lxcapi_want_daemonize;
>  	c->want_close_all_fds = lxcapi_want_close_all_fds;
> +	c->set_start_frozen = lxcapi_set_start_frozen;
>  	c->start = lxcapi_start;
>  	c->startl = lxcapi_startl;
>  	c->stop = lxcapi_stop;
> diff --git a/src/lxc/lxccontainer.h b/src/lxc/lxccontainer.h
> index 0755901..705eede 100644
> --- a/src/lxc/lxccontainer.h
> +++ b/src/lxc/lxccontainer.h
> @@ -823,6 +823,16 @@ struct lxc_container {
>  	 * \return \c 0 on success, nonzero on failure.
>  	 */
>  	int (*migrate)(struct lxc_container *c, unsigned int cmd, struct migrate_opts *opts, unsigned int size);
> +
> +	/*!
> +	 * \brief Specify whether the container wishes start in a frozen state.
> +	 *
> +	 * \param c Container.
> +	 * \param state Value for the start_frozen bit (0 or 1).
> +	 *
> +	 * \return \c true on success, else \c false.
> +	 */
> +	bool (*set_start_frozen)(struct lxc_container *c, bool state);
>  };
>  
>  /*!
> diff --git a/src/lxc/start.c b/src/lxc/start.c
> index 06bfd4b..d52e1d0 100644
> --- a/src/lxc/start.c
> +++ b/src/lxc/start.c
> @@ -1156,14 +1156,12 @@ static int lxc_spawn(struct lxc_handler *handler)
>  	}
>  
>  	/* Tell the child to complete its initialization and wait for
> -	 * it to exec or return an error.  (the child will never
> -	 * return LXC_SYNC_POST_CGROUP+1.  It will either close the
> -	 * sync pipe, causing lxc_sync_barrier_child to return
> -	 * success, or return a different value, causing us to error
> -	 * out).
> +	 * it to exec, wait to be frozen or return an error. (on error
> +	 * the child sends an unexpected sequence number that is not
> +	 * LXC_SYNC_POTS_CGROUP+1.)
>  	 */
>  	if (lxc_sync_barrier_child(handler, LXC_SYNC_POST_CGROUP))
> -		return -1;
> +		goto out_delete_net;

Yup, looks like a bug right now, thanks.

>  
>  	if (detect_shared_rootfs())
>  		umount2(handler->conf->rootfs.mount, MNT_DETACH);
> @@ -1177,6 +1175,13 @@ static int lxc_spawn(struct lxc_handler *handler)
>  		goto out_abort;
>  	}
>  
> +	if (handler->conf->start_frozen) {
> +		if (!cgroup_freeze(handler))
> +			goto out_abort;
> +		if (lxc_sync_wake_child(handler, LXC_SYNC_POST_START_FROZEN))
> +			goto out_abort;

Not quite sure how I feel about putting sequence numbers under
conditional.  Seems like begging for subtle sequence-out-of-sync
bugs later on.  I think we need to always to the sync points.

> +	}
> +
>  	lxc_sync_fini(handler);
>  
>  	return 0;
> @@ -1351,6 +1356,11 @@ static int start(struct lxc_handler *handler, void* data)
>  {
>  	struct start_args *arg = data;
>  
> +	if (handler->conf->start_frozen) {
> +		NOTICE("freezing '%s'", arg->argv[0]);
> +		lxc_sync_barrier_parent(handler, LXC_SYNC_START_FROZEN);
> +	}
> +
>  	NOTICE("exec'ing '%s'", arg->argv[0]);
>  
>  	execvp(arg->argv[0], arg->argv);
> diff --git a/src/lxc/sync.h b/src/lxc/sync.h
> index 930fcb3..2df2355 100644
> --- a/src/lxc/sync.h
> +++ b/src/lxc/sync.h
> @@ -30,6 +30,8 @@ enum {
>  	LXC_SYNC_POST_CONFIGURE,
>  	LXC_SYNC_CGROUP,
>  	LXC_SYNC_POST_CGROUP,
> +	LXC_SYNC_START_FROZEN,
> +	LXC_SYNC_POST_START_FROZEN,
>  	LXC_SYNC_RESTART,
>  	LXC_SYNC_POST_RESTART,
>  };
> -- 
> 2.1.4
> 
> 
> _______________________________________________
> lxc-devel mailing list
> lxc-devel at lists.linuxcontainers.org
> http://lists.linuxcontainers.org/listinfo/lxc-devel


More information about the lxc-devel mailing list