[lxc-devel] [PATCH 1/1] cgmanager updates

Serge Hallyn serge.hallyn at ubuntu.com
Mon Mar 3 22:39:00 UTC 2014


1. remove the cgm_dbus_disconnected handler.  We're using a proxy
   anyway, and not keeping it around.

2. comment most of the cgm functions to describe when they are called, to
   ease locking review

3. the cgmanager mutex is now held for the duration of a connection, from
   cgm_dbus_connect to cgm_dbus_disconnect.

3b. so remove the mutex lock/unlock from functions which are called during
   container startup with the cgmanager connection already up

4. remove the cgroup_restart().  It's no longer needed since we don't
   daemonize while we have the cgmanager socket open.

5. report errors and return early if cgm_dbus_connect() fails

6. don't keep the cgm connection open after cgm_ops_init.  I'm a bit torn
   on this one as it means that things like lxc-start will always connect
   twice.  But if we do this there is no good answer, given threaded API
   users, on when to drop that initial connection.

7. cgm_unfreeze and nrtasks: grab the dbus connection, as we'll never
   have it at that point.  (technically i doubt anyone will use
   cgmanager and utmp helper on the same host :)

8. lxc_spawn: make sure we only disconnect cgroups if they were already
   connected.

Signed-off-by: Serge Hallyn <serge.hallyn at ubuntu.com>
---
 src/lxc/cgmanager.c    | 143 ++++++++++++++++++++++++++++++-------------------
 src/lxc/cgroup.c       |   8 ---
 src/lxc/cgroup.h       |   1 -
 src/lxc/lxccontainer.c |   1 -
 src/lxc/start.c        |   6 +++
 5 files changed, 95 insertions(+), 64 deletions(-)

diff --git a/src/lxc/cgmanager.c b/src/lxc/cgmanager.c
index 7be94a7..c95b3d0 100644
--- a/src/lxc/cgmanager.c
+++ b/src/lxc/cgmanager.c
@@ -95,14 +95,13 @@ static char **subsystems;
 static DBusConnection *connection;
 
 #define CGMANAGER_DBUS_SOCK "unix:path=/sys/fs/cgroup/cgmanager/sock"
-static void cgm_dbus_disconnected(DBusConnection *connection);
 static bool cgm_dbus_connect(void)
 {
 	DBusError dbus_error;
 	dbus_error_init(&dbus_error);
 
 	lock_mutex(&thread_mutex);
-	connection = nih_dbus_connect(CGMANAGER_DBUS_SOCK, cgm_dbus_disconnected);
+	connection = nih_dbus_connect(CGMANAGER_DBUS_SOCK, NULL);
 	if (!connection) {
 		NihError *nerr;
 		nerr = nih_error_get();
@@ -134,36 +133,20 @@ static bool cgm_dbus_connect(void)
 		ERROR("Error pinging cgroup manager: %s", nerr->message);
 		nih_free(nerr);
 	}
-	unlock_mutex(&thread_mutex);
 	return true;
 }
 
 static void cgm_dbus_disconnect(void)
 {
-	lock_mutex(&thread_mutex);
 	if (cgroup_manager)
 		nih_free(cgroup_manager);
 	cgroup_manager = NULL;
-	dbus_connection_unref(connection);
+	if (connection)
+		dbus_connection_unref(connection);
 	connection = NULL;
 	unlock_mutex(&thread_mutex);
 }
 
-static void cgm_dbus_disconnected(DBusConnection *connection)
-{
-	lock_mutex(&thread_mutex);
-	WARN("Cgroup manager connection was terminated");
-	cgroup_manager = NULL;
-	dbus_connection_unref(connection);
-	connection = NULL;
-	unlock_mutex(&thread_mutex);
-	if (cgm_dbus_connect()) {
-		INFO("New cgroup manager connection was opened");
-	} else {
-		WARN("Cgroup manager unable to re-open connection");
-	}
-}
-
 static int send_creds(int sock, int rpid, int ruid, int rgid)
 {
 	struct msghdr msg = { 0 };
@@ -200,9 +183,11 @@ static int send_creds(int sock, int rpid, int ruid, int rgid)
 	return 0;
 }
 
+/*
+ * Called during container startup.  The cgmanager socket is open
+ */
 static bool lxc_cgmanager_create(const char *controller, const char *cgroup_path, int32_t *existed)
 {
-	lock_mutex(&thread_mutex);
 	if ( cgmanager_create_sync(NULL, cgroup_manager, controller,
 				       cgroup_path, existed) != 0) {
 		NihError *nerr;
@@ -210,11 +195,9 @@ static bool lxc_cgmanager_create(const char *controller, const char *cgroup_path
 		ERROR("call to cgmanager_create_sync failed: %s", nerr->message);
 		nih_free(nerr);
 		ERROR("Failed to create %s:%s", controller, cgroup_path);
-		unlock_mutex(&thread_mutex);
 		return false;
 	}
 
-	unlock_mutex(&thread_mutex);
 	return true;
 }
 
@@ -222,7 +205,6 @@ static bool lxc_cgmanager_escape(void)
 {
 	pid_t me = getpid();
 	int i;
-	lock_mutex(&thread_mutex);
 	for (i = 0; i < nr_subsystems; i++) {
 		if (cgmanager_move_pid_abs_sync(NULL, cgroup_manager,
 					subsystems[i], "/", me) != 0) {
@@ -231,12 +213,10 @@ static bool lxc_cgmanager_escape(void)
 			ERROR("call to cgmanager_move_pid_abs_sync(%s) failed: %s",
 					subsystems[i], nerr->message);
 			nih_free(nerr);
-			unlock_mutex(&thread_mutex);
 			return false;
 		}
 	}
 
-	unlock_mutex(&thread_mutex);
 	return true;
 }
 
@@ -254,7 +234,6 @@ static int do_chown_cgroup(const char *controller, const char *cgroup_path,
 
 	uid_t caller_nsuid = get_ns_uid(origuid);
 
-	lock_mutex(&thread_mutex);
 	if (socketpair(AF_UNIX, SOCK_DGRAM, 0, sv) < 0) {
 		SYSERROR("Error creating socketpair");
 		goto out;
@@ -316,7 +295,6 @@ static int do_chown_cgroup(const char *controller, const char *cgroup_path,
 out:
 	close(sv[0]);
 	close(sv[1]);
-	unlock_mutex(&thread_mutex);
 	if (ret == 1 && *buf == '1')
 		return 0;
 	return -1;
@@ -335,23 +313,26 @@ static int chown_cgroup_wrapper(void *data)
 	return do_chown_cgroup(arg->controller, arg->cgroup_path, arg->origuid);
 }
 
+/*
+ * Called during container startup.  The cgmanager socket is open
+ */
 static bool lxc_cgmanager_chmod(const char *controller,
 		const char *cgroup_path, const char *file, int mode)
 {
-	lock_mutex(&thread_mutex);
 	if (cgmanager_chmod_sync(NULL, cgroup_manager, controller,
 			cgroup_path, file, mode) != 0) {
 		NihError *nerr;
 		nerr = nih_error_get();
 		ERROR("call to cgmanager_chmod_sync failed: %s", nerr->message);
 		nih_free(nerr);
-		unlock_mutex(&thread_mutex);
 		return false;
 	}
-	unlock_mutex(&thread_mutex);
 	return true;
 }
 
+/*
+ * Called during container startup.  The cgmanager socket is open
+ */
 static bool chown_cgroup(const char *controller, const char *cgroup_path,
 			struct lxc_conf *conf)
 {
@@ -384,26 +365,30 @@ static bool chown_cgroup(const char *controller, const char *cgroup_path,
 static void cgm_remove_cgroup(const char *controller, const char *path)
 {
 	int existed;
-	lock_mutex(&thread_mutex);
 	if ( cgmanager_remove_sync(NULL, cgroup_manager, controller,
 				   path, CG_REMOVE_RECURSIVE, &existed) != 0) {
 		NihError *nerr;
 		nerr = nih_error_get();
 		ERROR("call to cgmanager_remove_sync failed: %s", nerr->message);
 		nih_free(nerr);
-		unlock_mutex(&thread_mutex);
 		ERROR("Error removing %s:%s", controller, path);
 	}
-	unlock_mutex(&thread_mutex);
 	if (existed == -1)
 		INFO("cgroup removal attempt: %s:%s did not exist", controller, path);
 }
 
+/*
+ * We are starting up a container.  We open the cgmanager socket, and leave it
+ * open (and mutex held) for the rest of container startup.
+ */
 static void *cgm_init(const char *name)
 {
 	struct cgm_data *d;
 
-	cgm_dbus_connect();
+	if (!cgm_dbus_connect()) {
+		ERROR("Error connecting to cgroup manager");
+		return NULL;
+	}
 	d = malloc(sizeof(*d));
 	if (!d)
 		return NULL;
@@ -429,6 +414,10 @@ err1:
 	return NULL;
 }
 
+/*
+ * Called after a failed container startup.  The cgmanager socket was just
+ * closed at end of lxc_spawn.  We need to re-open
+ */
 static void cgm_destroy(void *hdata)
 {
 	struct cgm_data *d = hdata;
@@ -436,7 +425,10 @@ static void cgm_destroy(void *hdata)
 
 	if (!d)
 		return;
-	cgm_dbus_connect();
+	if (!cgm_dbus_connect()) {
+		ERROR("Error connecting to cgroup manager");
+		return;
+	}
 	for (i = 0; i < nr_subsystems; i++)
 		cgm_remove_cgroup(subsystems[i], d->cgroup_path);
 
@@ -457,6 +449,9 @@ static inline void cleanup_cgroups(char *path)
 		cgm_remove_cgroup(subsystems[i], path);
 }
 
+/*
+ * Called during container startup.  The cgmanager socket is open
+ */
 static inline bool cgm_create(void *hdata)
 {
 	struct cgm_data *d = hdata;
@@ -527,17 +522,14 @@ next:
 static bool lxc_cgmanager_enter(pid_t pid, const char *controller,
 		const char *cgroup_path)
 {
-	lock_mutex(&thread_mutex);
 	if (cgmanager_move_pid_sync(NULL, cgroup_manager, controller,
 			cgroup_path, pid) != 0) {
 		NihError *nerr;
 		nerr = nih_error_get();
 		ERROR("call to cgmanager_move_pid_sync failed: %s", nerr->message);
 		nih_free(nerr);
-		unlock_mutex(&thread_mutex);
 		return false;
 	}
-	unlock_mutex(&thread_mutex);
 	return true;
 }
 
@@ -552,6 +544,9 @@ static bool do_cgm_enter(pid_t pid, const char *cgroup_path)
 	return true;
 }
 
+/*
+ * called during container startup.  cgmanager socket is open
+ */
 static inline bool cgm_enter(void *hdata, pid_t pid)
 {
 	struct cgm_data *d = hdata;
@@ -570,6 +565,11 @@ static const char *cgm_get_cgroup(void *hdata, const char *subsystem)
 	return d->cgroup_path;
 }
 
+/*
+ * nrtasks is called by the utmp helper by the container monitor.
+ * cgmanager socket was closed after cgroup setup was complete, so we need
+ * to reopen here.
+ */
 static int cgm_get_nrtasks(void *hdata)
 {
 	struct cgm_data *d = hdata;
@@ -579,21 +579,28 @@ static int cgm_get_nrtasks(void *hdata)
 	if (!d || !d->cgroup_path)
 		return false;
 
-	lock_mutex(&thread_mutex);
+	if (!cgm_dbus_connect()) {
+		ERROR("Error connecting to cgroup manager");
+		return false;
+	}
 	if (cgmanager_get_tasks_sync(NULL, cgroup_manager, subsystems[0],
 				     d->cgroup_path, &pids, &pids_len) != 0) {
 		NihError *nerr;
 		nerr = nih_error_get();
 		ERROR("call to cgmanager_get_tasks_sync failed: %s", nerr->message);
 		nih_free(nerr);
-		unlock_mutex(&thread_mutex);
+		cgm_dbus_disconnect();
 		return -1;
 	}
-	unlock_mutex(&thread_mutex);
 	nih_free(pids);
+	cgm_dbus_disconnect();
 	return pids_len;
 }
 
+/*
+ * cgm_get is called to get container cgroup settings.  cgmanager is not
+ * connected.
+ */
 static int cgm_get(const char *filename, char *value, size_t len, const char *name, const char *lxcpath)
 {
 	char *result, *controller, *key, *cgroup;
@@ -610,8 +617,11 @@ static int cgm_get(const char *filename, char *value, size_t len, const char *na
 	cgroup = lxc_cmd_get_cgroup_path(name, lxcpath, controller);
 	if (!cgroup)
 		return -1;
-	cgm_dbus_connect();
-	lock_mutex(&thread_mutex);
+	if (!cgm_dbus_connect()) {
+		ERROR("Error connecting to cgroup manager");
+		free(cgroup);
+		return -1;
+	}
 	if (cgmanager_get_value_sync(NULL, cgroup_manager, controller, cgroup, filename, &result) != 0) {
 		/*
 		 * must consume the nih error
@@ -622,11 +632,9 @@ static int cgm_get(const char *filename, char *value, size_t len, const char *na
 		nerr = nih_error_get();
 		nih_free(nerr);
 		free(cgroup);
-		unlock_mutex(&thread_mutex);
 		cgm_dbus_disconnect();
 		return -1;
 	}
-	unlock_mutex(&thread_mutex);
 	cgm_dbus_disconnect();
 	free(cgroup);
 	newlen = strlen(result);
@@ -653,7 +661,6 @@ static int cgm_do_set(const char *controller, const char *file,
 			 const char *cgroup, const char *value)
 {
 	int ret;
-	lock_mutex(&thread_mutex);
 	ret = cgmanager_set_value_sync(NULL, cgroup_manager, controller,
 				 cgroup, file, value);
 	if (ret != 0) {
@@ -663,10 +670,12 @@ static int cgm_do_set(const char *controller, const char *file,
 		nih_free(nerr);
 		ERROR("Error setting cgroup %s limit %s", file, cgroup);
 	}
-	unlock_mutex(&thread_mutex);
 	return ret;
 }
 
+/*
+ * cgm_set is called to change cgroup settings.  cgmanager is not connected
+ */
 static int cgm_set(const char *filename, const char *value, const char *name, const char *lxcpath)
 {
 	char *controller, *key, *cgroup;
@@ -686,7 +695,11 @@ static int cgm_set(const char *filename, const char *value, const char *name, co
 			controller, lxcpath, name);
 		return -1;
 	}
-	cgm_dbus_connect();
+	if (!cgm_dbus_connect()) {
+		ERROR("Error connecting to cgroup manager");
+		free(cgroup);
+		return -1;
+	}
 	ret = cgm_do_set(controller, filename, cgroup, value);
 	cgm_dbus_disconnect();
 	free(cgroup);
@@ -759,6 +772,11 @@ out_free:
 	return false;
 }
 
+/*
+ * called during cgroup.c:cgroup_ops_init(), at startup.  No threads.
+ * We check whether we can talk to cgmanager, escape to root cgroup if
+ * we are root, then close the connection.
+ */
 struct cgroup_ops *cgm_ops_init(void)
 {
 	if (!collect_subsytems())
@@ -767,8 +785,11 @@ struct cgroup_ops *cgm_ops_init(void)
 		goto err1;
 
 	// root;  try to escape to root cgroup
-	if (geteuid() == 0 && !lxc_cgmanager_escape())
+	if (geteuid() == 0 && !lxc_cgmanager_escape()) {
+		cgm_dbus_disconnect();
 		goto err2;
+	}
+	cgm_dbus_disconnect();
 
 	return &cgmanager_ops;
 
@@ -779,6 +800,10 @@ err1:
 	return NULL;
 }
 
+/*
+ * unfreeze is called by the command api after killing a container.
+ * cgmanager is not connected.
+ */
 static bool cgm_unfreeze(void *hdata)
 {
 	struct cgm_data *d = hdata;
@@ -786,7 +811,10 @@ static bool cgm_unfreeze(void *hdata)
 	if (!d || !d->cgroup_path)
 		return false;
 
-	lock_mutex(&thread_mutex);
+	if (!cgm_dbus_connect()) {
+		ERROR("Error connecting to cgroup manager");
+		return false;
+	}
 	if (cgmanager_set_value_sync(NULL, cgroup_manager, "freezer", d->cgroup_path,
 			"freezer.state", "THAWED") != 0) {
 		NihError *nerr;
@@ -794,13 +822,17 @@ static bool cgm_unfreeze(void *hdata)
 		ERROR("call to cgmanager_set_value_sync failed: %s", nerr->message);
 		nih_free(nerr);
 		ERROR("Error unfreezing %s", d->cgroup_path);
-		unlock_mutex(&thread_mutex);
+		cgm_dbus_disconnect();
 		return false;
 	}
-	unlock_mutex(&thread_mutex);
+	cgm_dbus_disconnect();
 	return true;
 }
 
+/*
+ * setup_limits is called during startup.  cgmanager is connected, and mutex
+ * is held
+ */
 static bool cgm_setup_limits(void *hdata, struct lxc_list *cgroup_settings, bool do_devices)
 {
 	struct cgm_data *d = hdata;
@@ -888,7 +920,10 @@ static bool cgm_attach(const char *name, const char *lxcpath, pid_t pid)
 		goto out;
 	}
 
-	cgm_dbus_connect();
+	if (!cgm_dbus_connect()) {
+		ERROR("Error connecting to cgroup manager");
+		goto out;
+	}
 	if (!(pass = do_cgm_enter(pid, cgroup)))
 		ERROR("Failed to enter group %s", cgroup);
 	cgm_dbus_disconnect();
diff --git a/src/lxc/cgroup.c b/src/lxc/cgroup.c
index 23c8c96..63a2ad4 100644
--- a/src/lxc/cgroup.c
+++ b/src/lxc/cgroup.c
@@ -168,14 +168,6 @@ int lxc_cgroup_get(const char *filename, char *value, size_t len, const char *na
 	return -1;
 }
 
-void restart_cgroups(void)
-{
-	if (ops && ops->disconnect)
-		ops->disconnect();
-	ops = NULL;
-	cgroup_ops_init();
-}
-
 void cgroup_disconnect(void)
 {
 	if (ops && ops->disconnect)
diff --git a/src/lxc/cgroup.h b/src/lxc/cgroup.h
index 8cba030..3e222a8 100644
--- a/src/lxc/cgroup.h
+++ b/src/lxc/cgroup.h
@@ -65,7 +65,6 @@ extern bool cgroup_create_legacy(struct lxc_handler *handler);
 extern int cgroup_nrtasks(struct lxc_handler *handler);
 extern const char *cgroup_get_cgroup(struct lxc_handler *handler, const char *subsystem);
 extern bool cgroup_unfreeze(struct lxc_handler *handler);
-extern void restart_cgroups(void);
 extern void cgroup_disconnect(void);
 
 #endif
diff --git a/src/lxc/lxccontainer.c b/src/lxc/lxccontainer.c
index b0ae44b..b7adebe 100644
--- a/src/lxc/lxccontainer.c
+++ b/src/lxc/lxccontainer.c
@@ -637,7 +637,6 @@ static bool lxcapi_start(struct lxc_container *c, int useinit, char * const argv
 		open("/dev/null", O_RDWR);
 		open("/dev/null", O_RDWR);
 		setsid();
-		restart_cgroups();
 	} else {
 		if (!am_single_threaded()) {
 			ERROR("Cannot start non-daemonized container when threaded");
diff --git a/src/lxc/start.c b/src/lxc/start.c
index 97c8207..eb1c659 100644
--- a/src/lxc/start.c
+++ b/src/lxc/start.c
@@ -773,6 +773,7 @@ static int lxc_spawn(struct lxc_handler *handler)
 {
 	int failed_before_rename = 0;
 	const char *name = handler->name;
+	bool cgroups_connected = false;
 	int saved_ns_fd[LXC_NS_MAX];
 	int preserve_mask = 0, i;
 	int netpipepair[2], nveths;
@@ -842,6 +843,8 @@ static int lxc_spawn(struct lxc_handler *handler)
 		goto out_delete_net;
 	}
 
+	cgroups_connected = true;
+
 	if (!cgroup_create(handler)) {
 		ERROR("failed creating cgroups");
 		goto out_delete_net;
@@ -953,6 +956,7 @@ static int lxc_spawn(struct lxc_handler *handler)
 	}
 
 	cgroup_disconnect();
+	cgroups_connected = false;
 
 	/* Tell the child to complete its initialization and wait for
 	 * it to exec or return an error.  (the child will never
@@ -981,6 +985,8 @@ static int lxc_spawn(struct lxc_handler *handler)
 	return 0;
 
 out_delete_net:
+	if (cgroups_connected)
+		cgroup_disconnect();
 	if (handler->clone_flags & CLONE_NEWNET)
 		lxc_delete_network(handler);
 out_abort:
-- 
1.9.0



More information about the lxc-devel mailing list