[lxc-devel] [PATCH] make monitor/monitord more resilient to unexpected termination

Dwight Engen dwight.engen at oracle.com
Wed Apr 2 17:12:38 UTC 2014


Reported-by: Florian Klink <flokli at flokli.de>
Signed-off-by: Dwight Engen <dwight.engen at oracle.com>
---
 src/lxc/lxc_monitord.c | 20 ++++++++++++++++----
 src/lxc/monitor.c      | 12 +++++++++---
 2 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/src/lxc/lxc_monitord.c b/src/lxc/lxc_monitord.c
index f6d99d5..8f7e6b0 100644
--- a/src/lxc/lxc_monitord.c
+++ b/src/lxc/lxc_monitord.c
@@ -75,6 +75,7 @@ static int quit;
 
 static int lxc_monitord_fifo_create(struct lxc_monitor *mon)
 {
+	struct flock lk;
 	char fifo_path[PATH_MAX];
 	int ret;
 
@@ -83,8 +84,8 @@ static int lxc_monitord_fifo_create(struct lxc_monitor *mon)
 		return ret;
 
 	ret = mknod(fifo_path, S_IFIFO|S_IRUSR|S_IWUSR, 0);
-	if (ret < 0) {
-		INFO("monitor fifo %s exists, already running?", fifo_path);
+	if (ret < 0 && errno != EEXIST) {
+		INFO("failed to mknod monitor fifo %s %s", fifo_path, strerror(errno));
 		return -1;
 	}
 
@@ -94,6 +95,17 @@ static int lxc_monitord_fifo_create(struct lxc_monitor *mon)
 		ERROR("failed to open monitor fifo");
 		return -1;
 	}
+
+	lk.l_type = F_WRLCK;
+	lk.l_whence = SEEK_SET;
+	lk.l_start = 0;
+	lk.l_len = 0;
+	if (fcntl(mon->fifofd, F_SETLK, &lk) != 0) {
+		/* another lxc-monitord is already running, don't start up */
+		DEBUG("lxc-monitord already running on lxcpath %s", mon->lxcpath);
+		close(mon->fifofd);
+		return -1;
+	}
 	return 0;
 }
 
@@ -264,8 +276,8 @@ static void lxc_monitord_delete(struct lxc_monitor *mon)
 	lxc_monitord_sock_delete(mon);
 
 	lxc_mainloop_del_handler(&mon->descr, mon->fifofd);
-	close(mon->fifofd);
 	lxc_monitord_fifo_delete(mon);
+	close(mon->fifofd);
 
 	for (i = 0; i < mon->clientfds_cnt; i++) {
 		lxc_mainloop_del_handler(&mon->descr, mon->clientfds[i]);
@@ -401,7 +413,7 @@ int main(int argc, char *argv[])
 		goto out;
 	}
 
-	NOTICE("monitoring lxcpath %s", mon.lxcpath);
+	NOTICE("pid:%d monitoring lxcpath %s", getpid(), mon.lxcpath);
 	for(;;) {
 		ret = lxc_mainloop(&mon.descr, 1000 * 30);
 		if (mon.clientfds_cnt <= 0)
diff --git a/src/lxc/monitor.c b/src/lxc/monitor.c
index e45b5cf..4ca4000 100644
--- a/src/lxc/monitor.c
+++ b/src/lxc/monitor.c
@@ -95,14 +95,20 @@ static void lxc_monitor_fifo_send(struct lxc_msg *msg, const char *lxcpath)
 	if (ret < 0)
 		return;
 
-	fd = open(fifo_path, O_WRONLY);
+	/* open the fifo nonblock in case the monitor is dead, we don't want
+	 * the open to wait for a reader since it may never come.
+	 */
+	fd = open(fifo_path, O_WRONLY|O_NONBLOCK);
 	if (fd < 0) {
-		/* it is normal for this open to fail when there is no monitor
-		 * running, so we don't log it
+		/* it is normal for this open to fail ENXIO when there is no
+		 * monitor running, so we don't log it
 		 */
 		return;
 	}
 
+	if (fcntl(fd, F_SETFL, O_WRONLY) < 0)
+		return;
+
 	ret = write(fd, msg, sizeof(*msg));
 	if (ret != sizeof(*msg)) {
 		close(fd);
-- 
1.8.5.3



More information about the lxc-devel mailing list