[lxc-devel] [PATCH] use poll instead of select

Serge Hallyn serge.hallyn at ubuntu.com
Tue Apr 21 20:19:43 UTC 2015


Particularly when using the go-lxc api with lots of threads, it
happens that if the open files limit is > 1024, we will try to
select on fd > 1024 which breaks on glibc.

So use poll instead of select.

Signed-off-by: Serge Hallyn <serge.hallyn at ubuntu.com>
---
 src/lxc/cgmanager.c   | 24 ++++++++++++++----------
 src/lxc/lxc.h         | 38 --------------------------------------
 src/lxc/lxc_monitor.c | 29 ++++++++++++++---------------
 src/lxc/monitor.c     | 29 +++++++++++++----------------
 src/lxc/monitor.h     | 42 +++++++++++++++++++++++++++++++++++++++++-
 5 files changed, 82 insertions(+), 80 deletions(-)

diff --git a/src/lxc/cgmanager.c b/src/lxc/cgmanager.c
index fe8913e..c8d0745 100644
--- a/src/lxc/cgmanager.c
+++ b/src/lxc/cgmanager.c
@@ -39,6 +39,7 @@
 #include <sys/mount.h>
 #include <netinet/in.h>
 #include <net/if.h>
+#include <poll.h>
 
 #include "error.h"
 #include "commands.h"
@@ -347,6 +348,7 @@ static int do_chown_cgroup(const char *controller, const char *cgroup_path,
 {
 	int sv[2] = {-1, -1}, optval = 1, ret = -1;
 	char buf[1];
+	struct pollfd fds;
 
 	if (socketpair(AF_UNIX, SOCK_DGRAM, 0, sv) < 0) {
 		SYSERROR("Error creating socketpair");
@@ -370,10 +372,10 @@ static int do_chown_cgroup(const char *controller, const char *cgroup_path,
 	}
 	/* now send credentials */
 
-	fd_set rfds;
-	FD_ZERO(&rfds);
-	FD_SET(sv[0], &rfds);
-	if (select(sv[0]+1, &rfds, NULL, NULL, NULL) < 0) {
+	fds.fd = sv[0];
+	fds.events = POLLIN;
+	fds.revents = 0;
+	if (poll(&fds, 1, -1) <= 0) {
 		ERROR("Error getting go-ahead from server: %s", strerror(errno));
 		goto out;
 	}
@@ -385,9 +387,10 @@ static int do_chown_cgroup(const char *controller, const char *cgroup_path,
 		SYSERROR("%s: Error sending pid over SCM_CREDENTIAL", __func__);
 		goto out;
 	}
-	FD_ZERO(&rfds);
-	FD_SET(sv[0], &rfds);
-	if (select(sv[0]+1, &rfds, NULL, NULL, NULL) < 0) {
+	fds.fd = sv[0];
+	fds.events = POLLIN;
+	fds.revents = 0;
+	if (poll(&fds, 1, -1) <= 0) {
 		ERROR("Error getting go-ahead from server: %s", strerror(errno));
 		goto out;
 	}
@@ -399,9 +402,10 @@ static int do_chown_cgroup(const char *controller, const char *cgroup_path,
 		SYSERROR("%s: Error sending pid over SCM_CREDENTIAL", __func__);
 		goto out;
 	}
-	FD_ZERO(&rfds);
-	FD_SET(sv[0], &rfds);
-	if (select(sv[0]+1, &rfds, NULL, NULL, NULL) < 0) {
+	fds.fd = sv[0];
+	fds.events = POLLIN;
+	fds.revents = 0;
+	if (poll(&fds, 1, -1) <= 0) {
 		ERROR("Error getting go-ahead from server: %s", strerror(errno));
 		goto out;
 	}
diff --git a/src/lxc/lxc.h b/src/lxc/lxc.h
index dd1048c..3da4ff6 100644
--- a/src/lxc/lxc.h
+++ b/src/lxc/lxc.h
@@ -64,44 +64,6 @@ extern int lxc_execute(const char *name, char *const argv[], int quiet,
 		       struct lxc_conf *conf, const char *lxcpath);
 
 /*
- * Open the monitoring mechanism for a specific container
- * The function will return an fd corresponding to the events
- * Returns a file descriptor on success, < 0 otherwise
- */
-extern int lxc_monitor_open(const char *lxcpath);
-
-/*
- * Blocking read for the next container state change
- * @fd  : the file descriptor provided by lxc_monitor_open
- * @msg : the variable which will be filled with the state
- * Returns 0 if the monitored container has exited, > 0 if
- * data was read, < 0 otherwise
- */
-extern int lxc_monitor_read(int fd, struct lxc_msg *msg);
-
-/*
- * Blocking read for the next container state change with timeout
- * @fd      : the file descriptor provided by lxc_monitor_open
- * @msg     : the variable which will be filled with the state
- * @timeout : the timeout in seconds to wait for a state change
- * Returns 0 if the monitored container has exited, > 0 if
- * data was read, < 0 otherwise
- */
-extern int lxc_monitor_read_timeout(int fd, struct lxc_msg *msg, int timeout);
-
-/*
- * Blocking read from multiple monitors for the next container state
- * change with timeout
- * @rfds    : an fd_set of file descriptors provided by lxc_monitor_open
- * @nfds    : the maximum fd number in rfds + 1
- * @msg     : the variable which will be filled with the state
- * @timeout : the timeout in seconds to wait for a state change
- * Returns 0 if the monitored container has exited, > 0 if
- * data was read, < 0 otherwise
- */
-extern int lxc_monitor_read_fdset(fd_set *rfds, int nfds, struct lxc_msg *msg, int timeout);
-
-/*
  * Close the fd associated with the monitoring
  * @fd : the file descriptor provided by lxc_monitor_open
  * Returns 0 on success, < 0 otherwise
diff --git a/src/lxc/lxc_monitor.c b/src/lxc/lxc_monitor.c
index ede19ff..b3d8912 100644
--- a/src/lxc/lxc_monitor.c
+++ b/src/lxc/lxc_monitor.c
@@ -29,6 +29,7 @@
 #include <sys/types.h>
 #include <sys/wait.h>
 #include <errno.h>
+#include <poll.h>
 
 #include "lxc.h"
 #include "log.h"
@@ -75,8 +76,9 @@ int main(int argc, char *argv[])
 	char *regexp;
 	struct lxc_msg msg;
 	regex_t preg;
-	fd_set rfds, rfds_save;
-	int len, rc, i, nfds = -1;
+	struct pollfd *fds;
+	nfds_t nfds;
+	int len, rc, i;
 
 	if (lxc_arguments_parse(&my_args, argc, argv))
 		return 1;
@@ -131,13 +133,14 @@ int main(int argc, char *argv[])
 	}
 	free(regexp);
 
-	if (my_args.lxcpath_cnt > FD_SETSIZE) {
-		ERROR("too many paths requested, only the first %d will be monitored", FD_SETSIZE);
-		my_args.lxcpath_cnt = FD_SETSIZE;
+	fds = malloc(my_args.lxcpath_cnt * sizeof(struct pollfd));
+	if (!fds) {
+		SYSERROR("out of memory");
+		return -1;
 	}
 
-	FD_ZERO(&rfds);
-	for (i = 0; i < my_args.lxcpath_cnt; i++) {
+	nfds = my_args.lxcpath_cnt;
+	for (i = 0; i < nfds; i++) {
 		int fd;
 
 		lxc_monitord_spawn(my_args.lxcpath[i]);
@@ -147,19 +150,15 @@ int main(int argc, char *argv[])
 			regfree(&preg);
 			return 1;
 		}
-		FD_SET(fd, &rfds);
-		if (fd > nfds)
-			nfds = fd;
+		fds[i].fd = fd;
+		fds[i].events = POLLIN;
+		fds[i].revents = 0;
 	}
-	memcpy(&rfds_save, &rfds, sizeof(rfds_save));
-	nfds++;
 
 	setlinebuf(stdout);
 
 	for (;;) {
-		memcpy(&rfds, &rfds_save, sizeof(rfds));
-
-		if (lxc_monitor_read_fdset(&rfds, nfds, &msg, -1) < 0) {
+		if (lxc_monitor_read_fdset(fds, nfds, &msg, -1) < 0) {
 			regfree(&preg);
 			return 1;
 		}
diff --git a/src/lxc/monitor.c b/src/lxc/monitor.c
index 1e1c094..0e56f75 100644
--- a/src/lxc/monitor.c
+++ b/src/lxc/monitor.c
@@ -38,6 +38,7 @@
 #include <sys/wait.h>
 #include <netinet/in.h>
 #include <net/if.h>
+#include <poll.h>
 
 #include "error.h"
 #include "af_unix.h"
@@ -219,19 +220,13 @@ err1:
 	return ret;
 }
 
-int lxc_monitor_read_fdset(fd_set *rfds, int nfds, struct lxc_msg *msg,
+int lxc_monitor_read_fdset(struct pollfd *fds, nfds_t nfds, struct lxc_msg *msg,
 			   int timeout)
 {
-	struct timeval tval,*tv = NULL;
-	int ret,i;
-
-	if (timeout != -1) {
-		tv = &tval;
-		tv->tv_sec = timeout;
-		tv->tv_usec = 0;
-	}
+	long i;
+	int ret;
 
-	ret = select(nfds, rfds, NULL, NULL, tv);
+	ret = poll(fds, nfds, timeout * 1000);
 	if (ret == -1)
 		return -1;
 	else if (ret == 0)
@@ -241,8 +236,9 @@ int lxc_monitor_read_fdset(fd_set *rfds, int nfds, struct lxc_msg *msg,
 	 * for when this routine is called again
 	 */
 	for (i = 0; i < nfds; i++) {
-		if (FD_ISSET(i, rfds)) {
-			ret = recv(i, msg, sizeof(*msg), 0);
+		if (fds[i].revents != 0) {
+			fds[i].revents = 0;
+			ret = recv(fds[i].fd, msg, sizeof(*msg), 0);
 			if (ret <= 0) {
 				SYSERROR("client failed to recv (monitord died?) %s",
 					 strerror(errno));
@@ -257,12 +253,13 @@ int lxc_monitor_read_fdset(fd_set *rfds, int nfds, struct lxc_msg *msg,
 
 int lxc_monitor_read_timeout(int fd, struct lxc_msg *msg, int timeout)
 {
-	fd_set rfds;
+	struct pollfd fds;
 
-	FD_ZERO(&rfds);
-	FD_SET(fd, &rfds);
+	fds.fd = fd;
+	fds.events = POLLIN | POLLPRI;
+	fds.revents = 0;
 
-	return lxc_monitor_read_fdset(&rfds, fd+1, msg, timeout);
+	return lxc_monitor_read_fdset(&fds, 1, msg, timeout);
 }
 
 int lxc_monitor_read(int fd, struct lxc_msg *msg)
diff --git a/src/lxc/monitor.h b/src/lxc/monitor.h
index 500a9f2..7a350d8 100644
--- a/src/lxc/monitor.h
+++ b/src/lxc/monitor.h
@@ -26,6 +26,7 @@
 #include <limits.h>
 #include <sys/param.h>
 #include <sys/un.h>
+#include <poll.h>
 
 #include "conf.h"
 
@@ -41,7 +42,6 @@ struct lxc_msg {
 	int value;
 };
 
-extern int lxc_monitor_open(const char *lxcpath);
 extern int lxc_monitor_sock_name(const char *lxcpath, struct sockaddr_un *addr);
 extern int lxc_monitor_fifo_name(const char *lxcpath, char *fifo_path,
 				 size_t fifo_path_sz, int do_mkdirp);
@@ -51,4 +51,44 @@ extern void lxc_monitor_send_exit_code(const char *name, int exit_code,
 			    const char *lxcpath);
 extern int lxc_monitord_spawn(const char *lxcpath);
 
+/*
+ * Open the monitoring mechanism for a specific container
+ * The function will return an fd corresponding to the events
+ * Returns a file descriptor on success, < 0 otherwise
+ */
+extern int lxc_monitor_open(const char *lxcpath);
+
+/*
+ * Blocking read for the next container state change
+ * @fd  : the file descriptor provided by lxc_monitor_open
+ * @msg : the variable which will be filled with the state
+ * Returns 0 if the monitored container has exited, > 0 if
+ * data was read, < 0 otherwise
+ */
+extern int lxc_monitor_read(int fd, struct lxc_msg *msg);
+
+/*
+ * Blocking read for the next container state change with timeout
+ * @fd      : the file descriptor provided by lxc_monitor_open
+ * @msg     : the variable which will be filled with the state
+ * @timeout : the timeout in seconds to wait for a state change
+ * Returns 0 if the monitored container has exited, > 0 if
+ * data was read, < 0 otherwise
+ */
+extern int lxc_monitor_read_timeout(int fd, struct lxc_msg *msg, int timeout);
+
+/*
+ * Blocking read from multiple monitors for the next container state
+ * change with timeout
+ * @fds     : struct pollfd descripting the fds to use
+ * @nfds    : the number of entries in fds
+ * @msg     : the variable which will be filled with the state
+ * @timeout : the timeout in seconds to wait for a state change
+ * Returns 0 if the monitored container has exited, > 0 if
+ * data was read, < 0 otherwise
+ */
+extern int lxc_monitor_read_fdset(struct pollfd *fds, nfds_t nfds, struct lxc_msg *msg,
+			   int timeout);
+
+
 #endif
-- 
1.9.1



More information about the lxc-devel mailing list