[lxc-devel] [PATCH] make monitor/monitord more resilient to unexpected termination
Serge Hallyn
serge.hallyn at ubuntu.com
Wed Apr 2 17:29:32 UTC 2014
Quoting Dwight Engen (dwight.engen at oracle.com):
> Reported-by: Florian Klink <flokli at flokli.de>
> Signed-off-by: Dwight Engen <dwight.engen at oracle.com>
Great - thanks!
Acked-by: Serge E. Hallyn <serge.hallyn at ubuntu.com>
(If the F_SETLK fails you don't check the errno, but the only
other one I'd expect is ENOLCK which we'd only have a different
debug statement for...)
> ---
> src/lxc/lxc_monitord.c | 20 ++++++++++++++++----
> src/lxc/monitor.c | 12 +++++++++---
> 2 files changed, 25 insertions(+), 7 deletions(-)
>
> diff --git a/src/lxc/lxc_monitord.c b/src/lxc/lxc_monitord.c
> index f6d99d5..8f7e6b0 100644
> --- a/src/lxc/lxc_monitord.c
> +++ b/src/lxc/lxc_monitord.c
> @@ -75,6 +75,7 @@ static int quit;
>
> static int lxc_monitord_fifo_create(struct lxc_monitor *mon)
> {
> + struct flock lk;
> char fifo_path[PATH_MAX];
> int ret;
>
> @@ -83,8 +84,8 @@ static int lxc_monitord_fifo_create(struct lxc_monitor *mon)
> return ret;
>
> ret = mknod(fifo_path, S_IFIFO|S_IRUSR|S_IWUSR, 0);
> - if (ret < 0) {
> - INFO("monitor fifo %s exists, already running?", fifo_path);
> + if (ret < 0 && errno != EEXIST) {
> + INFO("failed to mknod monitor fifo %s %s", fifo_path, strerror(errno));
> return -1;
> }
>
> @@ -94,6 +95,17 @@ static int lxc_monitord_fifo_create(struct lxc_monitor *mon)
> ERROR("failed to open monitor fifo");
> return -1;
> }
> +
> + lk.l_type = F_WRLCK;
> + lk.l_whence = SEEK_SET;
> + lk.l_start = 0;
> + lk.l_len = 0;
> + if (fcntl(mon->fifofd, F_SETLK, &lk) != 0) {
> + /* another lxc-monitord is already running, don't start up */
> + DEBUG("lxc-monitord already running on lxcpath %s", mon->lxcpath);
> + close(mon->fifofd);
> + return -1;
> + }
> return 0;
> }
>
> @@ -264,8 +276,8 @@ static void lxc_monitord_delete(struct lxc_monitor *mon)
> lxc_monitord_sock_delete(mon);
>
> lxc_mainloop_del_handler(&mon->descr, mon->fifofd);
> - close(mon->fifofd);
> lxc_monitord_fifo_delete(mon);
> + close(mon->fifofd);
>
> for (i = 0; i < mon->clientfds_cnt; i++) {
> lxc_mainloop_del_handler(&mon->descr, mon->clientfds[i]);
> @@ -401,7 +413,7 @@ int main(int argc, char *argv[])
> goto out;
> }
>
> - NOTICE("monitoring lxcpath %s", mon.lxcpath);
> + NOTICE("pid:%d monitoring lxcpath %s", getpid(), mon.lxcpath);
> for(;;) {
> ret = lxc_mainloop(&mon.descr, 1000 * 30);
> if (mon.clientfds_cnt <= 0)
> diff --git a/src/lxc/monitor.c b/src/lxc/monitor.c
> index e45b5cf..4ca4000 100644
> --- a/src/lxc/monitor.c
> +++ b/src/lxc/monitor.c
> @@ -95,14 +95,20 @@ static void lxc_monitor_fifo_send(struct lxc_msg *msg, const char *lxcpath)
> if (ret < 0)
> return;
>
> - fd = open(fifo_path, O_WRONLY);
> + /* open the fifo nonblock in case the monitor is dead, we don't want
> + * the open to wait for a reader since it may never come.
> + */
> + fd = open(fifo_path, O_WRONLY|O_NONBLOCK);
> if (fd < 0) {
> - /* it is normal for this open to fail when there is no monitor
> - * running, so we don't log it
> + /* it is normal for this open to fail ENXIO when there is no
> + * monitor running, so we don't log it
> */
> return;
> }
>
> + if (fcntl(fd, F_SETFL, O_WRONLY) < 0)
> + return;
> +
> ret = write(fd, msg, sizeof(*msg));
> if (ret != sizeof(*msg)) {
> close(fd);
> --
> 1.8.5.3
>
> _______________________________________________
> lxc-devel mailing list
> lxc-devel at lists.linuxcontainers.org
> http://lists.linuxcontainers.org/listinfo/lxc-devel
More information about the lxc-devel
mailing list