[lxc-devel] [lxc/master] utils: use clone() in run_command()

brauner on Github lxc-bot at linuxcontainers.org
Thu Dec 14 01:45:06 UTC 2017


A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 2246 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20171214/79eca37e/attachment.bin>
-------------- next part --------------
From 3b52c88ce5ba62013dd079e28003703028a9965f Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Thu, 14 Dec 2017 02:37:04 +0100
Subject: [PATCH] utils: use clone() in run_command()

run_command() is called in contexts where threads explicitly hold a lock while
fork()ing. Currently, this just affects the legacy cgmanager cgroup driver.
Here's what's happening when we use fork():

1. cgm_chown() calls cgm_dbus_connect()
2. cgm_dbus_connect() calls cgm_lock():
   Now the thread holds an explicit lock on the mutex
3. cgm_chown() calls chown_cgroup()
4. chown_cgroup() calls userns_exec_1()
5. userns_exec_1() forks with an explicit lock on the mutex being held
6. pthread_atfork() handlers get run including the prepare() handler:

        #ifdef HAVE_PTHREAD_ATFORK __attribute__((constructor))
        static void process_lock_setup_atfork(void)
        {
                pthread_atfork(cgm_lock, cgm_unlock, cgm_unlock);
        }
        #endif

   thus trying to acquire the mutex that is being explicitly held in the
   parent. If we were using recursive locks then the parent would now
   hold two locks but since I don't see us using them I guess we're
   simply getting undefined behavior.

There are multiple ways to solve this problem. They are all not very nice. One
solution is to use interposition wrapper for pthread_atfork() but that is
rather tricky since we need to have wrappers for the pthread_atfork() callbacks
and need to identify our caller so that we can make a decision whether we
should execute the callback or not. If this were a generic problem I'd say we
go for this solution but as this only affects the legacy cgmanager driver we
don't really care and I'd much rather enforce that any future code does not
take an explicit lock during a fork(). That sounds like a bad idea in the first
place. So simply switch from using fork() to clone() which does not run
pthread_atfork() handlers. If push comes to shove we might just go for doing
the clone() syscall directly via syscall(SYS_clone, ...).

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 src/lxc/utils.c | 71 +++++++++++++++++++++++++++++++++------------------------
 1 file changed, 41 insertions(+), 30 deletions(-)

diff --git a/src/lxc/utils.c b/src/lxc/utils.c
index a82a2f494..f797a7619 100644
--- a/src/lxc/utils.c
+++ b/src/lxc/utils.c
@@ -2215,11 +2215,42 @@ int lxc_unstack_mountpoint(const char *path, bool lazy)
 	return umounts;
 }
 
+struct run_command_args {
+	int (*child_fn)(void *);
+	void *args;
+	int pipefd[2];
+};
+
+static int run_command_callback(void *data)
+{
+	int ret;
+	struct run_command_args *args = data;
+
+	/* Close the read-end of the pipe. */
+	close(args->pipefd[0]);
+
+	/* Redirect std{err,out} to write-end of the pipe. */
+	ret = dup2(args->pipefd[1], STDOUT_FILENO);
+	if (ret >= 0)
+		ret = dup2(args->pipefd[1], STDERR_FILENO);
+
+	/* Close the write-end of the pipe. */
+	close(args->pipefd[1]);
+
+	if (ret < 0) {
+		SYSERROR("Failed to duplicate std{err,out} file descriptor");
+		return -1;
+	}
+
+	return args->child_fn(args->args);
+}
+
 int run_command(char *buf, size_t buf_size, int (*child_fn)(void *), void *args)
 {
 	pid_t child;
-	int ret, fret, pipefd[2];
+	int ret, pipefd[2];
 	ssize_t bytes;
+	struct run_command_args data;
 
 	/* Make sure our callers do not receive unitialized memory. */
 	if (buf_size > 0 && buf)
@@ -2230,39 +2261,19 @@ int run_command(char *buf, size_t buf_size, int (*child_fn)(void *), void *args)
 		return -1;
 	}
 
-	child = fork();
+	data.args = args;
+	data.pipefd[0] = pipefd[0];
+	data.pipefd[1] = pipefd[1];
+	data.child_fn = child_fn;
+
+	child = lxc_clone(run_command_callback, &data, 0);
 	if (child < 0) {
 		close(pipefd[0]);
 		close(pipefd[1]);
-		SYSERROR("failed to create new process");
+		SYSERROR("Failed to create new process");
 		return -1;
 	}
 
-	if (child == 0) {
-		/* Close the read-end of the pipe. */
-		close(pipefd[0]);
-
-		/* Redirect std{err,out} to write-end of the
-		 * pipe.
-		 */
-		ret = dup2(pipefd[1], STDOUT_FILENO);
-		if (ret >= 0)
-			ret = dup2(pipefd[1], STDERR_FILENO);
-
-		/* Close the write-end of the pipe. */
-		close(pipefd[1]);
-
-		if (ret < 0) {
-			SYSERROR("failed to duplicate std{err,out} file descriptor");
-			exit(EXIT_FAILURE);
-		}
-
-		/* Does not return. */
-		child_fn(args);
-		ERROR("failed to exec command");
-		exit(EXIT_FAILURE);
-	}
-
 	/* close the write-end of the pipe */
 	close(pipefd[1]);
 
@@ -2272,11 +2283,11 @@ int run_command(char *buf, size_t buf_size, int (*child_fn)(void *), void *args)
 			buf[bytes - 1] = '\0';
 	}
 
-	fret = wait_for_pid(child);
+	ret = wait_for_pid(child);
 	/* close the read-end of the pipe */
 	close(pipefd[0]);
 
-	return fret;
+	return ret;
 }
 
 char *must_make_path(const char *first, ...)


More information about the lxc-devel mailing list