[lxc-devel] [PATCH] make monitor/monitord more resilient to unexpected termination

Serge Hallyn serge.hallyn at ubuntu.com
Wed Apr 2 17:29:32 UTC 2014


Quoting Dwight Engen (dwight.engen at oracle.com):
> Reported-by: Florian Klink <flokli at flokli.de>
> Signed-off-by: Dwight Engen <dwight.engen at oracle.com>

Great - thanks!

Acked-by: Serge E. Hallyn <serge.hallyn at ubuntu.com>

(If the F_SETLK fails you don't check the errno, but the only
other one I'd expect is ENOLCK which we'd only have a different
debug statement for...)

> ---
>  src/lxc/lxc_monitord.c | 20 ++++++++++++++++----
>  src/lxc/monitor.c      | 12 +++++++++---
>  2 files changed, 25 insertions(+), 7 deletions(-)
> 
> diff --git a/src/lxc/lxc_monitord.c b/src/lxc/lxc_monitord.c
> index f6d99d5..8f7e6b0 100644
> --- a/src/lxc/lxc_monitord.c
> +++ b/src/lxc/lxc_monitord.c
> @@ -75,6 +75,7 @@ static int quit;
>  
>  static int lxc_monitord_fifo_create(struct lxc_monitor *mon)
>  {
> +	struct flock lk;
>  	char fifo_path[PATH_MAX];
>  	int ret;
>  
> @@ -83,8 +84,8 @@ static int lxc_monitord_fifo_create(struct lxc_monitor *mon)
>  		return ret;
>  
>  	ret = mknod(fifo_path, S_IFIFO|S_IRUSR|S_IWUSR, 0);
> -	if (ret < 0) {
> -		INFO("monitor fifo %s exists, already running?", fifo_path);
> +	if (ret < 0 && errno != EEXIST) {
> +		INFO("failed to mknod monitor fifo %s %s", fifo_path, strerror(errno));
>  		return -1;
>  	}
>  
> @@ -94,6 +95,17 @@ static int lxc_monitord_fifo_create(struct lxc_monitor *mon)
>  		ERROR("failed to open monitor fifo");
>  		return -1;
>  	}
> +
> +	lk.l_type = F_WRLCK;
> +	lk.l_whence = SEEK_SET;
> +	lk.l_start = 0;
> +	lk.l_len = 0;
> +	if (fcntl(mon->fifofd, F_SETLK, &lk) != 0) {
> +		/* another lxc-monitord is already running, don't start up */
> +		DEBUG("lxc-monitord already running on lxcpath %s", mon->lxcpath);
> +		close(mon->fifofd);
> +		return -1;
> +	}
>  	return 0;
>  }
>  
> @@ -264,8 +276,8 @@ static void lxc_monitord_delete(struct lxc_monitor *mon)
>  	lxc_monitord_sock_delete(mon);
>  
>  	lxc_mainloop_del_handler(&mon->descr, mon->fifofd);
> -	close(mon->fifofd);
>  	lxc_monitord_fifo_delete(mon);
> +	close(mon->fifofd);
>  
>  	for (i = 0; i < mon->clientfds_cnt; i++) {
>  		lxc_mainloop_del_handler(&mon->descr, mon->clientfds[i]);
> @@ -401,7 +413,7 @@ int main(int argc, char *argv[])
>  		goto out;
>  	}
>  
> -	NOTICE("monitoring lxcpath %s", mon.lxcpath);
> +	NOTICE("pid:%d monitoring lxcpath %s", getpid(), mon.lxcpath);
>  	for(;;) {
>  		ret = lxc_mainloop(&mon.descr, 1000 * 30);
>  		if (mon.clientfds_cnt <= 0)
> diff --git a/src/lxc/monitor.c b/src/lxc/monitor.c
> index e45b5cf..4ca4000 100644
> --- a/src/lxc/monitor.c
> +++ b/src/lxc/monitor.c
> @@ -95,14 +95,20 @@ static void lxc_monitor_fifo_send(struct lxc_msg *msg, const char *lxcpath)
>  	if (ret < 0)
>  		return;
>  
> -	fd = open(fifo_path, O_WRONLY);
> +	/* open the fifo nonblock in case the monitor is dead, we don't want
> +	 * the open to wait for a reader since it may never come.
> +	 */
> +	fd = open(fifo_path, O_WRONLY|O_NONBLOCK);
>  	if (fd < 0) {
> -		/* it is normal for this open to fail when there is no monitor
> -		 * running, so we don't log it
> +		/* it is normal for this open to fail ENXIO when there is no
> +		 * monitor running, so we don't log it
>  		 */
>  		return;
>  	}
>  
> +	if (fcntl(fd, F_SETFL, O_WRONLY) < 0)
> +		return;
> +
>  	ret = write(fd, msg, sizeof(*msg));
>  	if (ret != sizeof(*msg)) {
>  		close(fd);
> -- 
> 1.8.5.3
> 
> _______________________________________________
> lxc-devel mailing list
> lxc-devel at lists.linuxcontainers.org
> http://lists.linuxcontainers.org/listinfo/lxc-devel


More information about the lxc-devel mailing list