[lxc-devel] [RFC 1/2] lxc-start: added --start-frozen
Serge Hallyn
serge.hallyn at ubuntu.com
Tue Jan 19 16:54:59 UTC 2016
Quoting Wolfgang Bumiller (w.bumiller at proxmox.com):
> On Mon, Jan 18, 2016 at 11:18:32PM +0000, Serge Hallyn wrote:
> > Quoting Wolfgang Bumiller (w.bumiller at proxmox.com):
> > > --- a/src/lxc/lxccontainer.h
> > > +++ b/src/lxc/lxccontainer.h
> > > @@ -245,6 +245,16 @@ struct lxc_container {
> > > bool (*want_close_all_fds)(struct lxc_container *c, bool state);
> > >
> > > /*!
> > > + * \brief Specify whether the container wishes start in a frozen state.
> > > + *
> > > + * \param c Container.
> > > + * \param state Value for the start_frozen bit (0 or 1).
> > > + *
> > > + * \return \c true on success, else \c false.
> > > + */
> > > + bool (*set_start_frozen)(struct lxc_container *c, bool state);
> >
> > Hi,
> >
> > new functions must go at the end so as not to force linked applications
> > to be relinked.
>
> Does this also hold for cgroup_ops? (That would look ugly though...)
No - the reason it applies for lxc_container is because other software
calls its members. That doesn't happen with cgroup_ops.
> > > +
> > > + /*!
> > > * \brief Return current config file name.
> > > *
> > > * \param c Container.
> > > diff --git a/src/lxc/start.c b/src/lxc/start.c
> > > index 79dbe33..84a05f9 100644
> > > --- a/src/lxc/start.c
> > > +++ b/src/lxc/start.c
> > > @@ -1343,6 +1343,23 @@ static int start(struct lxc_handler *handler, void* data)
> > > {
> > > struct start_args *arg = data;
> > >
> > > + if (handler->conf->start_frozen) {
> > > + char *cgfile = NULL;
> > > + NOTICE("freezing '%s'", arg->argv[0]);
> > > + /* Let the monitor know we reached this point. */
> > > + lxc_sync_fini_child(handler);
> > > + if (asprintf(&cgfile, "/sys/fs/cgroup/freezer/lxc/%s/freezer.state", handler->name) == -1) {
> > > + ERROR("failed to allocate memory trying to freezed container '%s'", handler->name);
> > > + return -1;
> > > + }
> >
> > Hm, so this is done fromthe container's namespaces, and makes
> > assumptions about the cgroup layout. Won't work for unprivileged
> > or nested containers (without cgroup namespace).
> >
> > Depending on how important this is for you, it might be worth
> > putting a sync point here to have the parent move the child.
> > Except how will the child get to executing init? Can you use
> > ptrace somehow, have the parent step through until the child's
> > exec() and then freeze it and release the ptrace?
>
> From our perspective executing init isn't really an issue as we don't
> see a reason to distinguish between the process before and after the
> exec() call. Freezing before exec() might even be better since it sort
What is your goal for this? To strace?
> of guarantees the init process hasn't started touching the container
> yet, so I made a variant of the patch with an additional sync point.
>
> I'm attaching a new version inline stripped off doc changes and signoff
> since it's most likely not final yet anyway.
>
> I also came across another possible bug/cleanup situation, see the
> src/lxc/start.c changes: if the LXC_SYNC_POST_CGROUP barrier fails it
> returns rather than doing a `goto out_delete_net` like the calls above
> it, also the calls below revert back to `goto out_abort` and should
> probably be out_delete_net, too? (probably happend due to patch-contexts
> given that git-blame shows many authors for small groups of lines).
>
> New patch:
> ---
> From 43cc837c1782ef5c47e455922e5e03e6aefee1ee Mon Sep 17 00:00:00 2001
> From: Wolfgang Bumiller <w.bumiller at proxmox.com>
> Date: Thu, 14 Jan 2016 11:09:06 +0100
> Subject: [PATCH] lxc-start: added --start-frozen
>
> Add the possibility to start a container in a frozen state.
>
> ---
>
> diff --git a/src/lxc/arguments.h b/src/lxc/arguments.h
> index 898a9ce..066754b 100644
> --- a/src/lxc/arguments.h
> +++ b/src/lxc/arguments.h
> @@ -84,6 +84,9 @@ struct lxc_arguments {
> /* close fds from parent? */
> int close_all_fds;
>
> + /* freeze before starting init */
> + int start_frozen;
> +
> /* lxc-create */
> char *bdevtype, *configfile, *template;
> char *fstype;
> diff --git a/src/lxc/cgfs.c b/src/lxc/cgfs.c
> index da88bb7..ee0b92b 100644
> --- a/src/lxc/cgfs.c
> +++ b/src/lxc/cgfs.c
> @@ -2392,7 +2392,7 @@ out:
> return ret;
> }
>
> -static bool cgfs_unfreeze(void *hdata)
> +static bool cgfs_freezer_set(void *hdata, const char *state)
> {
> struct cgfs_data *d = hdata;
> char *cgabspath, *cgrelpath;
> @@ -2406,11 +2406,21 @@ static bool cgfs_unfreeze(void *hdata)
> if (!cgabspath)
> return false;
>
> - ret = do_cgroup_set(cgabspath, "freezer.state", "THAWED");
> + ret = do_cgroup_set(cgabspath, "freezer.state", state);
> free(cgabspath);
> return ret == 0;
> }
>
> +static bool cgfs_freeze(void *hdata)
> +{
> + return cgfs_freezer_set(hdata, "FROZEN");
> +}
> +
> +static bool cgfs_unfreeze(void *hdata)
> +{
> + return cgfs_freezer_set(hdata, "THAWED");
> +}
> +
> static bool cgroupfs_setup_limits(void *hdata, struct lxc_list *cgroup_conf,
> bool with_devices)
> {
> @@ -2460,6 +2470,7 @@ static struct cgroup_ops cgfs_ops = {
> .escape = cgfs_escape,
> .get = lxc_cgroupfs_get,
> .set = lxc_cgroupfs_set,
> + .freeze = cgfs_freeze,
> .unfreeze = cgfs_unfreeze,
> .setup_limits = cgroupfs_setup_limits,
> .name = "cgroupfs",
> diff --git a/src/lxc/cgmanager.c b/src/lxc/cgmanager.c
> index 614f6e7..f9a7ed6 100644
> --- a/src/lxc/cgmanager.c
> +++ b/src/lxc/cgmanager.c
> @@ -1460,7 +1460,7 @@ struct cgroup_ops *cgm_ops_init(void)
> }
>
> /* unfreeze is called by the command api after killing a container. */
> -static bool cgm_unfreeze(void *hdata)
> +static bool cgm_freezer_set(void *hdata, const char *state)
> {
> struct cgm_data *d = hdata;
> bool ret = true;
> @@ -1473,7 +1473,7 @@ static bool cgm_unfreeze(void *hdata)
> return false;
> }
> if (cgmanager_set_value_sync(NULL, cgroup_manager, "freezer", d->cgroup_path,
> - "freezer.state", "THAWED") != 0) {
> + "freezer.state", state) != 0) {
> NihError *nerr;
> nerr = nih_error_get();
> ERROR("call to cgmanager_set_value_sync failed: %s", nerr->message);
> @@ -1485,6 +1485,16 @@ static bool cgm_unfreeze(void *hdata)
> return ret;
> }
>
> +static bool cgm_freeze(void *hdata)
> +{
> + return cgm_freezer_set(hdata, "FROZEN");
> +}
> +
> +static bool cgm_unfreeze(void *hdata)
> +{
> + return cgm_freezer_set(hdata, "THAWED");
> +}
> +
> static bool cgm_setup_limits(void *hdata, struct lxc_list *cgroup_settings, bool do_devices)
> {
> struct cgm_data *d = hdata;
> @@ -1666,6 +1676,7 @@ static struct cgroup_ops cgmanager_ops = {
> .escape = cgm_escape,
> .get = cgm_get,
> .set = cgm_set,
> + .freeze = cgm_freeze,
> .unfreeze = cgm_unfreeze,
> .setup_limits = cgm_setup_limits,
> .name = "cgmanager",
> diff --git a/src/lxc/cgroup.c b/src/lxc/cgroup.c
> index 8954733..380447c 100644
> --- a/src/lxc/cgroup.c
> +++ b/src/lxc/cgroup.c
> @@ -136,6 +136,13 @@ bool cgroup_unfreeze(struct lxc_handler *handler)
> return false;
> }
>
> +bool cgroup_freeze(struct lxc_handler *handler)
> +{
> + if (ops)
> + return ops->freeze(handler->cgroup_data);
> + return false;
> +}
> +
> bool cgroup_setup_limits(struct lxc_handler *handler, bool with_devices)
> {
> if (ops)
> diff --git a/src/lxc/cgroup.h b/src/lxc/cgroup.h
> index e3d3ce4..df808d1 100644
> --- a/src/lxc/cgroup.h
> +++ b/src/lxc/cgroup.h
> @@ -50,6 +50,7 @@ struct cgroup_ops {
> bool (*escape)(void);
> int (*set)(const char *filename, const char *value, const char *name, const char *lxcpath);
> int (*get)(const char *filename, char *value, size_t len, const char *name, const char *lxcpath);
> + bool (*freeze)(void *hdata);
> bool (*unfreeze)(void *hdata);
> bool (*setup_limits)(void *hdata, struct lxc_list *cgroup_conf, bool with_devices);
> bool (*chown)(void *hdata, struct lxc_conf *conf);
> @@ -78,6 +79,7 @@ extern bool cgroup_escape(void);
> * Currently, this call only makes sense for privileged containers.
> */
> extern const char *cgroup_canonical_path(struct lxc_handler *handler);
> +extern bool cgroup_freeze(struct lxc_handler *handler);
> extern bool cgroup_unfreeze(struct lxc_handler *handler);
> extern void cgroup_disconnect(void);
> extern cgroup_driver_t cgroup_driver(void);
> diff --git a/src/lxc/conf.h b/src/lxc/conf.h
> index b0274ec..5c84f5f 100644
> --- a/src/lxc/conf.h
> +++ b/src/lxc/conf.h
> @@ -312,6 +312,7 @@ struct lxc_conf {
> struct lxc_rootfs rootfs;
> char *ttydir;
> int close_all_fds;
> + int start_frozen;
> struct lxc_list hooks[NUM_LXC_HOOKS];
>
> char *lsm_aa_profile;
> diff --git a/src/lxc/lxc_start.c b/src/lxc/lxc_start.c
> index 6b942ac..73e27e3 100644
> --- a/src/lxc/lxc_start.c
> +++ b/src/lxc/lxc_start.c
> @@ -53,6 +53,7 @@
> #define OPT_SHARE_NET OPT_USAGE+1
> #define OPT_SHARE_IPC OPT_USAGE+2
> #define OPT_SHARE_UTS OPT_USAGE+3
> +#define OPT_START_FROZEN OPT_USAGE+4
>
> lxc_log_define(lxc_start_ui, lxc);
>
> @@ -154,6 +155,7 @@ static int my_parser(struct lxc_arguments* args, int c, char* arg)
> case OPT_SHARE_NET: args->share_ns[LXC_NS_NET] = arg; break;
> case OPT_SHARE_IPC: args->share_ns[LXC_NS_IPC] = arg; break;
> case OPT_SHARE_UTS: args->share_ns[LXC_NS_UTS] = arg; break;
> + case OPT_START_FROZEN: args->start_frozen = 1; break;
> }
> return 0;
> }
> @@ -170,6 +172,7 @@ static const struct option my_longopts[] = {
> {"share-net", required_argument, 0, OPT_SHARE_NET},
> {"share-ipc", required_argument, 0, OPT_SHARE_IPC},
> {"share-uts", required_argument, 0, OPT_SHARE_UTS},
> + {"start-frozen", no_argument, 0, OPT_START_FROZEN},
> LXC_COMMON_OPTIONS
> };
>
> @@ -193,6 +196,7 @@ Options :\n\
> Note: --daemon implies --close-all-fds\n\
> -s, --define KEY=VAL Assign VAL to configuration variable KEY\n\
> --share-[net|ipc|uts]=NAME Share a namespace with another container or pid\n\
> + --start-frozen Freeze the container before starting its init\n\
> ",
> .options = my_longopts,
> .parser = my_parser,
> @@ -335,6 +339,9 @@ int main(int argc, char *argv[])
> if (my_args.close_all_fds)
> c->want_close_all_fds(c, true);
>
> + if (my_args.start_frozen)
> + c->set_start_frozen(c, true);
> +
> if (args == default_args)
> err = c->start(c, 0, NULL) ? 0 : 1;
> else
> diff --git a/src/lxc/lxccontainer.c b/src/lxc/lxccontainer.c
> index 490a59d..d1147f6 100644
> --- a/src/lxc/lxccontainer.c
> +++ b/src/lxc/lxccontainer.c
> @@ -591,6 +591,21 @@ static bool do_lxcapi_want_close_all_fds(struct lxc_container *c, bool state)
>
> WRAP_API_1(bool, lxcapi_want_close_all_fds, bool)
>
> +static bool do_lxcapi_set_start_frozen(struct lxc_container *c, bool state)
> +{
> + if (!c || !c->lxc_conf)
> + return false;
> + if (container_mem_lock(c)) {
> + ERROR("Error getting mem lock");
> + return false;
> + }
> + c->lxc_conf->start_frozen = state;
> + container_mem_unlock(c);
> + return true;
> +}
> +
> +WRAP_API_1(bool, lxcapi_set_start_frozen, bool)
> +
> static bool do_lxcapi_wait(struct lxc_container *c, const char *state, int timeout)
> {
> int ret;
> @@ -4096,6 +4111,7 @@ struct lxc_container *lxc_container_new(const char *name, const char *configpath
> c->load_config = lxcapi_load_config;
> c->want_daemonize = lxcapi_want_daemonize;
> c->want_close_all_fds = lxcapi_want_close_all_fds;
> + c->set_start_frozen = lxcapi_set_start_frozen;
> c->start = lxcapi_start;
> c->startl = lxcapi_startl;
> c->stop = lxcapi_stop;
> diff --git a/src/lxc/lxccontainer.h b/src/lxc/lxccontainer.h
> index 0755901..705eede 100644
> --- a/src/lxc/lxccontainer.h
> +++ b/src/lxc/lxccontainer.h
> @@ -823,6 +823,16 @@ struct lxc_container {
> * \return \c 0 on success, nonzero on failure.
> */
> int (*migrate)(struct lxc_container *c, unsigned int cmd, struct migrate_opts *opts, unsigned int size);
> +
> + /*!
> + * \brief Specify whether the container wishes start in a frozen state.
> + *
> + * \param c Container.
> + * \param state Value for the start_frozen bit (0 or 1).
> + *
> + * \return \c true on success, else \c false.
> + */
> + bool (*set_start_frozen)(struct lxc_container *c, bool state);
> };
>
> /*!
> diff --git a/src/lxc/start.c b/src/lxc/start.c
> index 06bfd4b..d52e1d0 100644
> --- a/src/lxc/start.c
> +++ b/src/lxc/start.c
> @@ -1156,14 +1156,12 @@ static int lxc_spawn(struct lxc_handler *handler)
> }
>
> /* Tell the child to complete its initialization and wait for
> - * it to exec or return an error. (the child will never
> - * return LXC_SYNC_POST_CGROUP+1. It will either close the
> - * sync pipe, causing lxc_sync_barrier_child to return
> - * success, or return a different value, causing us to error
> - * out).
> + * it to exec, wait to be frozen or return an error. (on error
> + * the child sends an unexpected sequence number that is not
> + * LXC_SYNC_POTS_CGROUP+1.)
> */
> if (lxc_sync_barrier_child(handler, LXC_SYNC_POST_CGROUP))
> - return -1;
> + goto out_delete_net;
Yup, looks like a bug right now, thanks.
>
> if (detect_shared_rootfs())
> umount2(handler->conf->rootfs.mount, MNT_DETACH);
> @@ -1177,6 +1175,13 @@ static int lxc_spawn(struct lxc_handler *handler)
> goto out_abort;
> }
>
> + if (handler->conf->start_frozen) {
> + if (!cgroup_freeze(handler))
> + goto out_abort;
> + if (lxc_sync_wake_child(handler, LXC_SYNC_POST_START_FROZEN))
> + goto out_abort;
Not quite sure how I feel about putting sequence numbers under
conditional. Seems like begging for subtle sequence-out-of-sync
bugs later on. I think we need to always to the sync points.
> + }
> +
> lxc_sync_fini(handler);
>
> return 0;
> @@ -1351,6 +1356,11 @@ static int start(struct lxc_handler *handler, void* data)
> {
> struct start_args *arg = data;
>
> + if (handler->conf->start_frozen) {
> + NOTICE("freezing '%s'", arg->argv[0]);
> + lxc_sync_barrier_parent(handler, LXC_SYNC_START_FROZEN);
> + }
> +
> NOTICE("exec'ing '%s'", arg->argv[0]);
>
> execvp(arg->argv[0], arg->argv);
> diff --git a/src/lxc/sync.h b/src/lxc/sync.h
> index 930fcb3..2df2355 100644
> --- a/src/lxc/sync.h
> +++ b/src/lxc/sync.h
> @@ -30,6 +30,8 @@ enum {
> LXC_SYNC_POST_CONFIGURE,
> LXC_SYNC_CGROUP,
> LXC_SYNC_POST_CGROUP,
> + LXC_SYNC_START_FROZEN,
> + LXC_SYNC_POST_START_FROZEN,
> LXC_SYNC_RESTART,
> LXC_SYNC_POST_RESTART,
> };
> --
> 2.1.4
>
>
> _______________________________________________
> lxc-devel mailing list
> lxc-devel at lists.linuxcontainers.org
> http://lists.linuxcontainers.org/listinfo/lxc-devel
More information about the lxc-devel
mailing list