[lxc-devel] [lxc/master] log: fix race

brauner on Github lxc-bot at linuxcontainers.org
Fri Nov 25 16:32:31 UTC 2016


A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 2186 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20161125/61a710de/attachment.bin>
-------------- next part --------------
From b1b6ae500a5a97199c79624deb5d2af588cdbd75 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Fri, 25 Nov 2016 17:28:18 +0100
Subject: [PATCH] log: fix race

localtime_r() can lead to deadlocks because it calls __tzset() and
__tzconvert() internally. The deadlock stems from an interaction between these
functions and the functions in monitor.c and commands.{c,h}. The latter
functions will write to the log independent of the container thread that is
currently running. Since the monitor fork()ed it seems to duplicate the mutex
states of the time functions mentioned above causing the deadlock.
As a short termm fix, I suggest to simply disable receiving the time when
monitor.c or command.{c,h} functions are called. This should be ok, since the
[lxc monitor] will only emit a few messages and thread-safety is currently more
important than beautiful logs. The rest of the log stays the same as it was
before.

Here is an example output from logs where I printed the pid and tid of the
process that is currently writing to the log:

            lxc 20161125170200.619 INFO     lxc_start:   18695-18695: - start.c:lxc_check_inherited:243 - Closed inherited fd: 23.
            lxc 20161125170200.640 DEBUG    lxc_start:   18677-18677: - start.c:__lxc_start:1334 - Not dropping CAP_SYS_BOOT or watching utmp.
            lxc 20161125170200.640 INFO     lxc_cgroup:  18677-18677: - cgroups/cgroup.c:cgroup_init:68 - cgroup driver cgroupfs-ng initing for lxc-test-concurrent-0

----------> lxc 20150427012246.000 INFO     lxc_monitor: 13017-18622: - monitor.c:lxc_monitor_sock_name:178 - using monitor sock name lxc/ad055575fe28ddd5//var/lib/lxc

            lxc 20161125170200.662 DEBUG    lxc_cgfsng:  18677-18677: - cgroups/cgfsng.c:filter_and_set_cpus:478 - No isolated cpus detected.
            lxc 20161125170200.662 DEBUG    lxc_cgfsng:  18677-18677: - cgroups/cgfsng.c:handle_cpuset_hierarchy:648 - "cgroup.clone_children" was already set to "1".

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 src/lxc/log.c | 40 ++++++++++++++++++++++++++++------------
 1 file changed, 28 insertions(+), 12 deletions(-)

diff --git a/src/lxc/log.c b/src/lxc/log.c
index a08cc26..0997fda 100644
--- a/src/lxc/log.c
+++ b/src/lxc/log.c
@@ -45,6 +45,12 @@
 
 #define LXC_LOG_DATEFOMAT_SIZE  15
 
+#if !HAVE_GETTID
+static inline pid_t gettid(void) {
+        return (pid_t) syscall(SYS_gettid);
+}
+#endif
+
 int lxc_log_fd = -1;
 static int syslog_enable = 0;
 int lxc_quiet_specified;
@@ -141,7 +147,7 @@ static int log_append_logfile(const struct lxc_log_appender *appender,
 	char buffer[LXC_LOG_BUFFER_SIZE];
 	struct tm newtime;
 	int n;
-	int ms;
+	int ms = 0;
 	int fd_to_use = -1;
 
 #ifndef NO_LXC_CONF
@@ -155,11 +161,25 @@ static int log_append_logfile(const struct lxc_log_appender *appender,
 	if (fd_to_use == -1)
 		return 0;
 
-	if (!localtime_r(&event->timestamp.tv_sec, &newtime))
-		return 0;
-
-	strftime(date, sizeof(date), "%Y%m%d%H%M%S", &newtime);
-	ms = event->timestamp.tv_usec / 1000;
+	/* localtime_r() can lead to deadlocks because it calls __tzset() and
+	 * __tzconvert() internally. The deadlock stems from an interaction
+	 * between these functions and the functions in monitor.c and
+	 * commands.{c,h}. The latter functions will write to the log
+	 * independent of the container thread that is currently running. Since
+	 * the monitor fork()ed it seems to duplicate the mutex states of the
+	 * time functions mentioned above.
+	 * As a short termm fix, I suggest to simply disable receiving the time
+	 * when monitor.c or command.{c,h} functions are called. This should be
+	 * ok, since the [lxc monitor] will only emit a few messages and
+	 * thread-safety is currently more important than beautiful logs. The
+	 * rest of the log stays the same as it was before.
+	 */
+	if (getpid() == gettid()) {
+		if (!localtime_r(&event->timestamp.tv_sec, &newtime))
+			return 0;
+		strftime(date, sizeof(date), "%Y%m%d%H%M%S", &newtime);
+		ms = event->timestamp.tv_usec / 1000;
+	}
 	n = snprintf(buffer, sizeof(buffer),
 		     "%15s%s%s %10s.%03d %-8s %s - %s:%s:%d - ",
 		     log_prefix,
@@ -176,13 +196,9 @@ static int log_append_logfile(const struct lxc_log_appender *appender,
 		return n;
 
 	if (n < sizeof(buffer) - 1)
-		n += vsnprintf(buffer + n, sizeof(buffer) - n, event->fmt,
-			       *event->vap);
-	else {
-		WARN("truncated next event from %d to %zd bytes", n,
-		     sizeof(buffer));
+		n += vsnprintf(buffer + n, sizeof(buffer) - n, event->fmt, *event->vap);
+	else
 		n = sizeof(buffer) - 1;
-	}
 
 	buffer[n] = '\n';
 


More information about the lxc-devel mailing list