[lxc-devel] [RFC 1/2] lxc-start: added --start-frozen

Wolfgang Bumiller w.bumiller at proxmox.com
Tue Jan 19 14:17:21 UTC 2016


On Mon, Jan 18, 2016 at 11:18:32PM +0000, Serge Hallyn wrote:
> Quoting Wolfgang Bumiller (w.bumiller at proxmox.com):
> > --- a/src/lxc/lxccontainer.h
> > +++ b/src/lxc/lxccontainer.h
> > @@ -245,6 +245,16 @@ struct lxc_container {
> >  	bool (*want_close_all_fds)(struct lxc_container *c, bool state);
> >  
> >  	/*!
> > +	 * \brief Specify whether the container wishes start in a frozen state.
> > +	 *
> > +	 * \param c Container.
> > +	 * \param state Value for the start_frozen bit (0 or 1).
> > +	 *
> > +	 * \return \c true on success, else \c false.
> > +	 */
> > +	bool (*set_start_frozen)(struct lxc_container *c, bool state);
> 
> Hi,
> 
> new functions must go at the end so as not to force linked applications
> to be relinked.

Does this also hold for cgroup_ops? (That would look ugly though...)

> > +
> > +	/*!
> >  	 * \brief Return current config file name.
> >  	 *
> >  	 * \param c Container.
> > diff --git a/src/lxc/start.c b/src/lxc/start.c
> > index 79dbe33..84a05f9 100644
> > --- a/src/lxc/start.c
> > +++ b/src/lxc/start.c
> > @@ -1343,6 +1343,23 @@ static int start(struct lxc_handler *handler, void* data)
> >  {
> >  	struct start_args *arg = data;
> >  
> > +	if (handler->conf->start_frozen) {
> > +		char *cgfile = NULL;
> > +		NOTICE("freezing '%s'", arg->argv[0]);
> > +		/* Let the monitor know we reached this point. */
> > +		lxc_sync_fini_child(handler);
> > +		if (asprintf(&cgfile, "/sys/fs/cgroup/freezer/lxc/%s/freezer.state", handler->name) == -1) {
> > +			ERROR("failed to allocate memory trying to freezed container '%s'", handler->name);
> > +			return -1;
> > +		}
> 
> Hm, so this is done fromthe container's namespaces, and makes
> assumptions about the cgroup layout.  Won't work for unprivileged
> or nested containers (without cgroup namespace).
>
> Depending on how important this is for you, it might be worth
> putting a sync point here to have the parent move the child.
> Except how will the child get to executing init?  Can you use
> ptrace somehow, have the parent step through until the child's
> exec() and then freeze it and release the ptrace?

>From our perspective executing init isn't really an issue as we don't
see a reason to distinguish between the process before and after the
exec() call. Freezing before exec() might even be better since it sort
of guarantees the init process hasn't started touching the container
yet, so I made a variant of the patch with an additional sync point.

I'm attaching a new version inline stripped off doc changes and signoff
since it's most likely not final yet anyway.

I also came across another possible bug/cleanup situation, see the
src/lxc/start.c changes: if the LXC_SYNC_POST_CGROUP barrier fails it
returns rather than doing a `goto out_delete_net` like the calls above
it, also the calls below revert back to `goto out_abort` and should
probably be out_delete_net, too? (probably happend due to patch-contexts
given that git-blame shows many authors for small groups of lines).

New patch:
---
>From 43cc837c1782ef5c47e455922e5e03e6aefee1ee Mon Sep 17 00:00:00 2001
From: Wolfgang Bumiller <w.bumiller at proxmox.com>
Date: Thu, 14 Jan 2016 11:09:06 +0100
Subject: [PATCH] lxc-start: added --start-frozen

Add the possibility to start a container in a frozen state.

---

diff --git a/src/lxc/arguments.h b/src/lxc/arguments.h
index 898a9ce..066754b 100644
--- a/src/lxc/arguments.h
+++ b/src/lxc/arguments.h
@@ -84,6 +84,9 @@ struct lxc_arguments {
 	/* close fds from parent? */
 	int close_all_fds;
 
+	/* freeze before starting init */
+	int start_frozen;
+
 	/* lxc-create */
 	char *bdevtype, *configfile, *template;
 	char *fstype;
diff --git a/src/lxc/cgfs.c b/src/lxc/cgfs.c
index da88bb7..ee0b92b 100644
--- a/src/lxc/cgfs.c
+++ b/src/lxc/cgfs.c
@@ -2392,7 +2392,7 @@ out:
 	return ret;
 }
 
-static bool cgfs_unfreeze(void *hdata)
+static bool cgfs_freezer_set(void *hdata, const char *state)
 {
 	struct cgfs_data *d = hdata;
 	char *cgabspath, *cgrelpath;
@@ -2406,11 +2406,21 @@ static bool cgfs_unfreeze(void *hdata)
 	if (!cgabspath)
 		return false;
 
-	ret = do_cgroup_set(cgabspath, "freezer.state", "THAWED");
+	ret = do_cgroup_set(cgabspath, "freezer.state", state);
 	free(cgabspath);
 	return ret == 0;
 }
 
+static bool cgfs_freeze(void *hdata)
+{
+	return cgfs_freezer_set(hdata, "FROZEN");
+}
+
+static bool cgfs_unfreeze(void *hdata)
+{
+	return cgfs_freezer_set(hdata, "THAWED");
+}
+
 static bool cgroupfs_setup_limits(void *hdata, struct lxc_list *cgroup_conf,
 				  bool with_devices)
 {
@@ -2460,6 +2470,7 @@ static struct cgroup_ops cgfs_ops = {
 	.escape = cgfs_escape,
 	.get = lxc_cgroupfs_get,
 	.set = lxc_cgroupfs_set,
+	.freeze = cgfs_freeze,
 	.unfreeze = cgfs_unfreeze,
 	.setup_limits = cgroupfs_setup_limits,
 	.name = "cgroupfs",
diff --git a/src/lxc/cgmanager.c b/src/lxc/cgmanager.c
index 614f6e7..f9a7ed6 100644
--- a/src/lxc/cgmanager.c
+++ b/src/lxc/cgmanager.c
@@ -1460,7 +1460,7 @@ struct cgroup_ops *cgm_ops_init(void)
 }
 
 /* unfreeze is called by the command api after killing a container.  */
-static bool cgm_unfreeze(void *hdata)
+static bool cgm_freezer_set(void *hdata, const char *state)
 {
 	struct cgm_data *d = hdata;
 	bool ret = true;
@@ -1473,7 +1473,7 @@ static bool cgm_unfreeze(void *hdata)
 		return false;
 	}
 	if (cgmanager_set_value_sync(NULL, cgroup_manager, "freezer", d->cgroup_path,
-			"freezer.state", "THAWED") != 0) {
+			"freezer.state", state) != 0) {
 		NihError *nerr;
 		nerr = nih_error_get();
 		ERROR("call to cgmanager_set_value_sync failed: %s", nerr->message);
@@ -1485,6 +1485,16 @@ static bool cgm_unfreeze(void *hdata)
 	return ret;
 }
 
+static bool cgm_freeze(void *hdata)
+{
+	return cgm_freezer_set(hdata, "FROZEN");
+}
+
+static bool cgm_unfreeze(void *hdata)
+{
+	return cgm_freezer_set(hdata, "THAWED");
+}
+
 static bool cgm_setup_limits(void *hdata, struct lxc_list *cgroup_settings, bool do_devices)
 {
 	struct cgm_data *d = hdata;
@@ -1666,6 +1676,7 @@ static struct cgroup_ops cgmanager_ops = {
 	.escape = cgm_escape,
 	.get = cgm_get,
 	.set = cgm_set,
+	.freeze = cgm_freeze,
 	.unfreeze = cgm_unfreeze,
 	.setup_limits = cgm_setup_limits,
 	.name = "cgmanager",
diff --git a/src/lxc/cgroup.c b/src/lxc/cgroup.c
index 8954733..380447c 100644
--- a/src/lxc/cgroup.c
+++ b/src/lxc/cgroup.c
@@ -136,6 +136,13 @@ bool cgroup_unfreeze(struct lxc_handler *handler)
 	return false;
 }
 
+bool cgroup_freeze(struct lxc_handler *handler)
+{
+	if (ops)
+		return ops->freeze(handler->cgroup_data);
+	return false;
+}
+
 bool cgroup_setup_limits(struct lxc_handler *handler, bool with_devices)
 {
 	if (ops)
diff --git a/src/lxc/cgroup.h b/src/lxc/cgroup.h
index e3d3ce4..df808d1 100644
--- a/src/lxc/cgroup.h
+++ b/src/lxc/cgroup.h
@@ -50,6 +50,7 @@ struct cgroup_ops {
 	bool (*escape)(void);
 	int (*set)(const char *filename, const char *value, const char *name, const char *lxcpath);
 	int (*get)(const char *filename, char *value, size_t len, const char *name, const char *lxcpath);
+	bool (*freeze)(void *hdata);
 	bool (*unfreeze)(void *hdata);
 	bool (*setup_limits)(void *hdata, struct lxc_list *cgroup_conf, bool with_devices);
 	bool (*chown)(void *hdata, struct lxc_conf *conf);
@@ -78,6 +79,7 @@ extern bool cgroup_escape(void);
  * Currently, this call  only makes sense for privileged containers.
  */
 extern const char *cgroup_canonical_path(struct lxc_handler *handler);
+extern bool cgroup_freeze(struct lxc_handler *handler);
 extern bool cgroup_unfreeze(struct lxc_handler *handler);
 extern void cgroup_disconnect(void);
 extern cgroup_driver_t cgroup_driver(void);
diff --git a/src/lxc/conf.h b/src/lxc/conf.h
index b0274ec..5c84f5f 100644
--- a/src/lxc/conf.h
+++ b/src/lxc/conf.h
@@ -312,6 +312,7 @@ struct lxc_conf {
 	struct lxc_rootfs rootfs;
 	char *ttydir;
 	int close_all_fds;
+	int start_frozen;
 	struct lxc_list hooks[NUM_LXC_HOOKS];
 
 	char *lsm_aa_profile;
diff --git a/src/lxc/lxc_start.c b/src/lxc/lxc_start.c
index 6b942ac..73e27e3 100644
--- a/src/lxc/lxc_start.c
+++ b/src/lxc/lxc_start.c
@@ -53,6 +53,7 @@
 #define OPT_SHARE_NET OPT_USAGE+1
 #define OPT_SHARE_IPC OPT_USAGE+2
 #define OPT_SHARE_UTS OPT_USAGE+3
+#define OPT_START_FROZEN OPT_USAGE+4
 
 lxc_log_define(lxc_start_ui, lxc);
 
@@ -154,6 +155,7 @@ static int my_parser(struct lxc_arguments* args, int c, char* arg)
 	case OPT_SHARE_NET: args->share_ns[LXC_NS_NET] = arg; break;
 	case OPT_SHARE_IPC: args->share_ns[LXC_NS_IPC] = arg; break;
 	case OPT_SHARE_UTS: args->share_ns[LXC_NS_UTS] = arg; break;
+	case OPT_START_FROZEN: args->start_frozen = 1; break;
 	}
 	return 0;
 }
@@ -170,6 +172,7 @@ static const struct option my_longopts[] = {
 	{"share-net", required_argument, 0, OPT_SHARE_NET},
 	{"share-ipc", required_argument, 0, OPT_SHARE_IPC},
 	{"share-uts", required_argument, 0, OPT_SHARE_UTS},
+	{"start-frozen", no_argument, 0, OPT_START_FROZEN},
 	LXC_COMMON_OPTIONS
 };
 
@@ -193,6 +196,7 @@ Options :\n\
                          Note: --daemon implies --close-all-fds\n\
   -s, --define KEY=VAL   Assign VAL to configuration variable KEY\n\
       --share-[net|ipc|uts]=NAME Share a namespace with another container or pid\n\
+      --start-frozen     Freeze the container before starting its init\n\
 ",
 	.options   = my_longopts,
 	.parser    = my_parser,
@@ -335,6 +339,9 @@ int main(int argc, char *argv[])
 	if (my_args.close_all_fds)
 		c->want_close_all_fds(c, true);
 
+	if (my_args.start_frozen)
+		c->set_start_frozen(c, true);
+
 	if (args == default_args)
 		err = c->start(c, 0, NULL) ? 0 : 1;
 	else
diff --git a/src/lxc/lxccontainer.c b/src/lxc/lxccontainer.c
index 490a59d..d1147f6 100644
--- a/src/lxc/lxccontainer.c
+++ b/src/lxc/lxccontainer.c
@@ -591,6 +591,21 @@ static bool do_lxcapi_want_close_all_fds(struct lxc_container *c, bool state)
 
 WRAP_API_1(bool, lxcapi_want_close_all_fds, bool)
 
+static bool do_lxcapi_set_start_frozen(struct lxc_container *c, bool state)
+{
+	if (!c || !c->lxc_conf)
+		return false;
+	if (container_mem_lock(c)) {
+		ERROR("Error getting mem lock");
+		return false;
+	}
+	c->lxc_conf->start_frozen = state;
+	container_mem_unlock(c);
+	return true;
+}
+
+WRAP_API_1(bool, lxcapi_set_start_frozen, bool)
+
 static bool do_lxcapi_wait(struct lxc_container *c, const char *state, int timeout)
 {
 	int ret;
@@ -4096,6 +4111,7 @@ struct lxc_container *lxc_container_new(const char *name, const char *configpath
 	c->load_config = lxcapi_load_config;
 	c->want_daemonize = lxcapi_want_daemonize;
 	c->want_close_all_fds = lxcapi_want_close_all_fds;
+	c->set_start_frozen = lxcapi_set_start_frozen;
 	c->start = lxcapi_start;
 	c->startl = lxcapi_startl;
 	c->stop = lxcapi_stop;
diff --git a/src/lxc/lxccontainer.h b/src/lxc/lxccontainer.h
index 0755901..705eede 100644
--- a/src/lxc/lxccontainer.h
+++ b/src/lxc/lxccontainer.h
@@ -823,6 +823,16 @@ struct lxc_container {
 	 * \return \c 0 on success, nonzero on failure.
 	 */
 	int (*migrate)(struct lxc_container *c, unsigned int cmd, struct migrate_opts *opts, unsigned int size);
+
+	/*!
+	 * \brief Specify whether the container wishes start in a frozen state.
+	 *
+	 * \param c Container.
+	 * \param state Value for the start_frozen bit (0 or 1).
+	 *
+	 * \return \c true on success, else \c false.
+	 */
+	bool (*set_start_frozen)(struct lxc_container *c, bool state);
 };
 
 /*!
diff --git a/src/lxc/start.c b/src/lxc/start.c
index 06bfd4b..d52e1d0 100644
--- a/src/lxc/start.c
+++ b/src/lxc/start.c
@@ -1156,14 +1156,12 @@ static int lxc_spawn(struct lxc_handler *handler)
 	}
 
 	/* Tell the child to complete its initialization and wait for
-	 * it to exec or return an error.  (the child will never
-	 * return LXC_SYNC_POST_CGROUP+1.  It will either close the
-	 * sync pipe, causing lxc_sync_barrier_child to return
-	 * success, or return a different value, causing us to error
-	 * out).
+	 * it to exec, wait to be frozen or return an error. (on error
+	 * the child sends an unexpected sequence number that is not
+	 * LXC_SYNC_POTS_CGROUP+1.)
 	 */
 	if (lxc_sync_barrier_child(handler, LXC_SYNC_POST_CGROUP))
-		return -1;
+		goto out_delete_net;
 
 	if (detect_shared_rootfs())
 		umount2(handler->conf->rootfs.mount, MNT_DETACH);
@@ -1177,6 +1175,13 @@ static int lxc_spawn(struct lxc_handler *handler)
 		goto out_abort;
 	}
 
+	if (handler->conf->start_frozen) {
+		if (!cgroup_freeze(handler))
+			goto out_abort;
+		if (lxc_sync_wake_child(handler, LXC_SYNC_POST_START_FROZEN))
+			goto out_abort;
+	}
+
 	lxc_sync_fini(handler);
 
 	return 0;
@@ -1351,6 +1356,11 @@ static int start(struct lxc_handler *handler, void* data)
 {
 	struct start_args *arg = data;
 
+	if (handler->conf->start_frozen) {
+		NOTICE("freezing '%s'", arg->argv[0]);
+		lxc_sync_barrier_parent(handler, LXC_SYNC_START_FROZEN);
+	}
+
 	NOTICE("exec'ing '%s'", arg->argv[0]);
 
 	execvp(arg->argv[0], arg->argv);
diff --git a/src/lxc/sync.h b/src/lxc/sync.h
index 930fcb3..2df2355 100644
--- a/src/lxc/sync.h
+++ b/src/lxc/sync.h
@@ -30,6 +30,8 @@ enum {
 	LXC_SYNC_POST_CONFIGURE,
 	LXC_SYNC_CGROUP,
 	LXC_SYNC_POST_CGROUP,
+	LXC_SYNC_START_FROZEN,
+	LXC_SYNC_POST_START_FROZEN,
 	LXC_SYNC_RESTART,
 	LXC_SYNC_POST_RESTART,
 };
-- 
2.1.4




More information about the lxc-devel mailing list