[lxc-devel] [lxc/master] start: use separate socket on daemonized start

brauner on Github lxc-bot at linuxcontainers.org
Thu Jun 29 13:05:42 UTC 2017


A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 1597 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20170629/17776d37/attachment.bin>
-------------- next part --------------
From 1223af832a4c06ef0a3edde87a78412ea9513c0d Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Thu, 29 Jun 2017 00:50:19 +0200
Subject: [PATCH 1/3] utils: lxc_make_abstract_socket_name()

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 src/lxc/commands.c | 63 ++++--------------------------------------------------
 src/lxc/start.c    |  1 +
 src/lxc/start.h    |  1 +
 src/lxc/utils.c    | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/lxc/utils.h    |  4 ++++
 5 files changed, 71 insertions(+), 59 deletions(-)

diff --git a/src/lxc/commands.c b/src/lxc/commands.c
index d7f1ccdc8..1333b8c57 100644
--- a/src/lxc/commands.c
+++ b/src/lxc/commands.c
@@ -28,7 +28,6 @@
 #include <fcntl.h>
 #include <poll.h>
 #include <sys/socket.h>
-#include <inttypes.h>
 #include <sys/un.h>
 #include <sys/param.h>
 #include <malloc.h>
@@ -76,62 +75,6 @@
 
 lxc_log_define(lxc_commands, lxc);
 
-static int fill_sock_name(char *path, int len, const char *lxcname,
-			  const char *lxcpath, const char *hashed_sock_name)
-{
-	const char *name;
-	char *tmppath;
-	size_t tmplen;
-	uint64_t hash;
-	int ret;
-
-	name = lxcname;
-	if (!name)
-		name = "";
-
-	if (hashed_sock_name != NULL) {
-		ret = snprintf(path, len, "lxc/%s/command", hashed_sock_name);
-		if (ret < 0 || ret >= len) {
-			ERROR("Error writing to command sock path");
-			return -1;
-		}
-		return 0;
-	}
-
-	if (!lxcpath) {
-		lxcpath = lxc_global_config_value("lxc.lxcpath");
-		if (!lxcpath) {
-			ERROR("Out of memory getting lxcpath");
-			return -1;
-		}
-	}
-
-	ret = snprintf(path, len, "%s/%s/command", lxcpath, name);
-	if (ret < 0) {
-		ERROR("Error writing to command sock path");
-		return -1;
-	}
-	if (ret < len)
-		return 0;
-
-	/* ret >= len; lxcpath or name is too long.  hash both */
-	tmplen = strlen(name) + strlen(lxcpath) + 2;
-	tmppath = alloca(tmplen);
-	ret = snprintf(tmppath, tmplen, "%s/%s", lxcpath, name);
-	if (ret < 0 || ret >= tmplen) {
-		ERROR("memory error");
-		return -1;
-	}
-	hash = fnv_64a_buf(tmppath, ret, FNV1A_64_INIT);
-	ret = snprintf(path, len, "lxc/%016" PRIx64 "/command", hash);
-	if (ret < 0 || ret >= len) {
-		ERROR("Command socket name too long");
-		return -1;
-	}
-
-	return 0;
-}
-
 static const char *lxc_cmd_str(lxc_cmd_t cmd)
 {
 	static const char * const cmdname[LXC_CMD_MAX] = {
@@ -300,7 +243,8 @@ static int lxc_cmd(const char *name, struct lxc_cmd_rr *cmd, int *stopped,
 	 * because we print the sockname out sometimes.
 	 */
 	len = sizeof(path)-2;
-	if (fill_sock_name(offset, len, name, lxcpath, hashed_sock_name))
+	if (lxc_make_abstract_socket_name(offset, len, name, lxcpath,
+					  hashed_sock_name, "command"))
 		return -1;
 
 	sock = lxc_abstract_unix_connect(path);
@@ -1156,7 +1100,8 @@ int lxc_cmd_init(const char *name, struct lxc_handler *handler,
 	 * because we print the sockname out sometimes.
 	 */
 	len = sizeof(path) - 2;
-	if (fill_sock_name(offset, len, name, lxcpath, NULL))
+	if (lxc_make_abstract_socket_name(offset, len, name, lxcpath, NULL,
+					  "command"))
 		return -1;
 
 	fd = lxc_abstract_unix_open(path, SOCK_STREAM, 0);
diff --git a/src/lxc/start.c b/src/lxc/start.c
index b7afc7bab..ad9ce6176 100644
--- a/src/lxc/start.c
+++ b/src/lxc/start.c
@@ -481,6 +481,7 @@ struct lxc_handler *lxc_init_handler(const char *name, struct lxc_conf *conf,
 	handler->conf = conf;
 	handler->lxcpath = lxcpath;
 	handler->pinfd = -1;
+	handler->state_socket = -1;
 	lxc_list_init(&handler->state_clients);
 
 	for (i = 0; i < LXC_NS_MAX; i++)
diff --git a/src/lxc/start.h b/src/lxc/start.h
index 103f15b67..0be4ee191 100644
--- a/src/lxc/start.h
+++ b/src/lxc/start.h
@@ -51,6 +51,7 @@ struct lxc_handler {
 	int nsfd[LXC_NS_MAX];
 	int netnsfd;
 	struct lxc_list state_clients;
+	int state_socket;
 };
 
 struct lxc_operations {
diff --git a/src/lxc/utils.c b/src/lxc/utils.c
index e21985373..89abc9ee5 100644
--- a/src/lxc/utils.c
+++ b/src/lxc/utils.c
@@ -23,11 +23,13 @@
 
 #include "config.h"
 
+#define __STDC_FORMAT_MACROS /* Required for PRIu64 to work. */
 #include <ctype.h>
 #include <dirent.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <grp.h>
+#include <inttypes.h>
 #include <libgen.h>
 #include <stddef.h>
 #include <stdio.h>
@@ -2333,3 +2335,62 @@ int run_command(char *buf, size_t buf_size, int (*child_fn)(void *), void *args)
 
 	return fret;
 }
+
+int lxc_make_abstract_socket_name(char *path, int len, const char *lxcname,
+				  const char *lxcpath,
+				  const char *hashed_sock_name,
+				  const char *suffix)
+{
+	const char *name;
+	char *tmppath;
+	size_t tmplen;
+	uint64_t hash;
+	int ret;
+
+	name = lxcname;
+	if (!name)
+		name = "";
+
+	if (hashed_sock_name != NULL) {
+		ret =
+		    snprintf(path, len, "lxc/%s/%s", hashed_sock_name, suffix);
+		if (ret < 0 || ret >= len) {
+			ERROR("Failed to create abstract socket name");
+			return -1;
+		}
+		return 0;
+	}
+
+	if (!lxcpath) {
+		lxcpath = lxc_global_config_value("lxc.lxcpath");
+		if (!lxcpath) {
+			ERROR("Failed to allocate memory");
+			return -1;
+		}
+	}
+
+	ret = snprintf(path, len, "%s/%s/%s", lxcpath, name, suffix);
+	if (ret < 0) {
+		ERROR("Failed to create abstract socket name");
+		return -1;
+	}
+	if (ret < len)
+		return 0;
+
+	/* ret >= len; lxcpath or name is too long.  hash both */
+	tmplen = strlen(name) + strlen(lxcpath) + 2;
+	tmppath = alloca(tmplen);
+	ret = snprintf(tmppath, tmplen, "%s/%s", lxcpath, name);
+	if (ret < 0 || ret >= tmplen) {
+		ERROR("Failed to create abstract socket name");
+		return -1;
+	}
+	hash = fnv_64a_buf(tmppath, ret, FNV1A_64_INIT);
+	ret = snprintf(path, len, "lxc/%016" PRIx64 "/%s", hash, suffix);
+	if (ret < 0 || ret >= len) {
+		ERROR("Failed to create abstract socket name");
+		return -1;
+	}
+
+	return 0;
+}
diff --git a/src/lxc/utils.h b/src/lxc/utils.h
index 916ee56a6..7d81a3270 100644
--- a/src/lxc/utils.h
+++ b/src/lxc/utils.h
@@ -374,5 +374,9 @@ int lxc_unstack_mountpoint(const char *path, bool lazy);
  * @param[in] args     Arguments to be passed to child_fn.
  */
 int run_command(char *buf, size_t buf_size, int (*child_fn)(void *), void *args);
+int lxc_make_abstract_socket_name(char *path, int len, const char *lxcname,
+				  const char *lxcpath,
+				  const char *hashed_sock_name,
+				  const char *suffix);
 
 #endif /* __LXC_UTILS_H */

From 9fe8c25b215d57372c10b269e422eed02fdf20a5 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Thu, 29 Jun 2017 12:16:00 +0200
Subject: [PATCH 2/3] start: use separate socket on daemonized start

Since we killed lxc-monitord we rely on the container's command socket to wait
for the container. This doesn't work nicely on daemonized startup since a
container's init process might be something that is so short-lived that we
won't even be able to add a state client before the mainloop closes. But the
container might still have been RUNNING and executed the init binary correctly.
In this case we would erroneously report that the container failed to start
when it actually started just fine.
This commit ensures that we really all cases where the container successfully
ran by switching to a short-lived per-container unix abstract domain socket
that uses credentials to pass container states around. It is immediately closed
once the container has started successfully.
This should also make daemonized container start way more robust since we don't
rely on the command socket handler to be running.

For the experienced developer: Yes, I did think about utilizing the command
socket directly for this. The problem is that when the mainloop starts it may
end up end accept()ing the connection that we want
do_wait_on_daemonized_start() to accept() so this won't work and might end us
to hang indefinitely.

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 src/lxc/criu.c         |  2 +-
 src/lxc/lxccontainer.c | 76 +++++++++++++++++++++++++++++++++++++-------------
 src/lxc/start.c        | 61 +++++++++++++++++++++++++++++++++++++++-
 src/lxc/start.h        |  6 +++-
 4 files changed, 122 insertions(+), 23 deletions(-)

diff --git a/src/lxc/criu.c b/src/lxc/criu.c
index 6fa42b246..ffee65b2c 100644
--- a/src/lxc/criu.c
+++ b/src/lxc/criu.c
@@ -797,7 +797,7 @@ static void do_restore(struct lxc_container *c, int status_pipe, struct migrate_
 		close(fd);
 	}
 
-	handler = lxc_init_handler(c->name, c->lxc_conf, c->config_path);
+	handler = lxc_init_handler(c->name, c->lxc_conf, c->config_path, false);
 	if (!handler)
 		goto out;
 
diff --git a/src/lxc/lxccontainer.c b/src/lxc/lxccontainer.c
index 5301d092c..9ee56a090 100644
--- a/src/lxc/lxccontainer.c
+++ b/src/lxc/lxccontainer.c
@@ -38,6 +38,7 @@
 #include <sys/types.h>
 #include <sys/wait.h>
 
+#include "af_unix.h"
 #include "attach.h"
 #include "bdev.h"
 #include "lxcoverlay.h"
@@ -611,23 +612,6 @@ static bool do_lxcapi_wait(struct lxc_container *c, const char *state, int timeo
 
 WRAP_API_2(bool, lxcapi_wait, const char *, int)
 
-static bool do_wait_on_daemonized_start(struct lxc_container *c, int pid)
-{
-	/* we'll probably want to make this timeout configurable? */
-	int timeout = 5, ret, status;
-
-	/*
-	 * our child is going to fork again, then exit.  reap the
-	 * child
-	 */
-	ret = waitpid(pid, &status, 0);
-	if (ret == -1 || !WIFEXITED(status) || WEXITSTATUS(status) != 0)
-		DEBUG("failed waiting for first dual-fork child");
-	return do_lxcapi_wait(c, "RUNNING", timeout);
-}
-
-WRAP_API_1(bool, wait_on_daemonized_start, int)
-
 static bool am_single_threaded(void)
 {
 	struct dirent *direntp;
@@ -714,6 +698,54 @@ static void free_init_cmd(char **argv)
 	free(argv);
 }
 
+static bool wait_on_daemonized_start(struct lxc_handler *handler, int pid)
+{
+	int ret, status;
+	int state_client = -1;
+	lxc_state_t state = -1;
+
+	/* Accept state client. */
+	state_client = accept(handler->state_socket, NULL, 0);
+	/* Close state server socket. */
+	close(handler->state_socket);
+	handler->state_socket = -1;
+	if (state_client < 0) {
+		SYSERROR("Failed to accept container state client");
+		close(state_client);
+		return true;
+	}
+
+	/* Receive container state. */
+	ret = lxc_abstract_unix_rcv_credential(state_client, &state,
+						sizeof(lxc_state_t));
+	/* Close container state client. */
+	close(state_client);
+
+	/* The first child is going to fork() again and then exits. So we reap
+	 * the first child here.
+	 */
+	if (waitpid(pid, &status, 0) < 0)
+		DEBUG("Failed waiting on first child");
+	else if (!WIFEXITED(status))
+		DEBUG("Failed to retrieve exit status of first child");
+	else if (WEXITSTATUS(status) != 0)
+		DEBUG("First child exited with: %d", WEXITSTATUS(status));
+
+	if (ret < 0) {
+		SYSERROR("Failed to receive the container state");
+		return false;
+	}
+
+	if (state != RUNNING) {
+		ERROR("Received container state \"%s\" instead of \"RUNNING\"",
+		      lxc_state2str(state));
+		return false;
+	}
+
+	TRACE("Retrieved container state \"%s\"", lxc_state2str(state));
+	return true;
+}
+
 static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const argv[])
 {
 	int ret;
@@ -761,7 +793,7 @@ static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const a
 	daemonize = c->daemonize;
 
 	/* initialize handler */
-	handler = lxc_init_handler(c->name, conf, c->config_path);
+	handler = lxc_init_handler(c->name, conf, c->config_path, daemonize);
 	container_mem_unlock(c);
 	if (!handler)
 		return false;
@@ -805,8 +837,12 @@ static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const a
 			 * the PID file, child will do the free and unlink.
 			 */
 			c->pidfile = NULL;
+
+			/* Prevent leaking the command socket to the second
+			 * fork().
+			 */
 			close(handler->conf->maincmd_fd);
-			return wait_on_daemonized_start(c, pid);
+			return wait_on_daemonized_start(handler, pid);
 		}
 
 		/* We don't really care if this doesn't print all the
@@ -895,7 +931,7 @@ static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const a
 reboot:
 	if (conf->reboot == 2) {
 		/* initialize handler */
-		handler = lxc_init_handler(c->name, conf, c->config_path);
+		handler = lxc_init_handler(c->name, conf, c->config_path, daemonize);
 		if (!handler)
 			goto out;
 	}
diff --git a/src/lxc/start.c b/src/lxc/start.c
index ad9ce6176..cabee26e5 100644
--- a/src/lxc/start.c
+++ b/src/lxc/start.c
@@ -344,12 +344,36 @@ static int signal_handler(int fd, uint32_t events, void *data,
 int lxc_set_state(const char *name, struct lxc_handler *handler,
 		  lxc_state_t state)
 {
+	int state_socket;
 	ssize_t ret;
 	struct lxc_list *cur, *next;
 	struct state_client *client;
 	struct lxc_msg msg = {.type = lxc_msg_state, .value = state};
 
+	if (state != STARTING) {
+		state_socket = lxc_abstract_unix_connect(handler->state_socket_name);
+		if (state_socket < 0) {
+			if (errno == ECONNREFUSED)
+				goto serve_state_clients;
+			SYSERROR("Failed to connect to state socket");
+			return -1;
+		}
+		TRACE("Connected to container state socket \"%s\" via %d",
+				&handler->state_socket_name[1], state_socket);
+
+		ret = lxc_abstract_unix_send_credential(state_socket, &(int){state}, sizeof(int));
+		if (ret != sizeof(int)) {
+			SYSERROR("Failed to send state");
+		}
+		TRACE("Sent container state \"%s\" to %d", lxc_state2str(state),
+				state_socket);
+		close(state_socket);
+	}
+
+serve_state_clients:
+
 	process_lock();
+
 	/* Only set state under process lock held so that we don't cause
 	 * lxc_cmd_state_server() to miss a state.
 	 */
@@ -456,6 +480,9 @@ void lxc_free_handler(struct lxc_handler *handler)
 	if (handler->conf && handler->conf->maincmd_fd)
 		close(handler->conf->maincmd_fd);
 
+	if (handler->state_socket >= 0)
+		close(handler->state_socket);
+
 	if (handler->name)
 		free(handler->name);
 
@@ -464,7 +491,7 @@ void lxc_free_handler(struct lxc_handler *handler)
 }
 
 struct lxc_handler *lxc_init_handler(const char *name, struct lxc_conf *conf,
-				     const char *lxcpath)
+				     const char *lxcpath, bool daemonize)
 {
 	int i;
 	struct lxc_handler *handler;
@@ -493,6 +520,38 @@ struct lxc_handler *lxc_init_handler(const char *name, struct lxc_conf *conf,
 		goto on_error;
 	}
 
+	if (daemonize) {
+		int ret;
+		char path[sizeof(((struct sockaddr_un *)0)->sun_path)] = { 0 };
+		size_t len = sizeof(path) - 2;
+		char *offset = &path[1];
+
+		ret = lxc_make_abstract_socket_name(offset, len, name, lxcpath,
+						    NULL, "state");
+		if (ret < 0)
+			goto on_error;
+
+		handler->state_socket = lxc_abstract_unix_open(path, SOCK_STREAM, 0);
+		if (handler->state_socket < 0) {
+			SYSERROR("Failed to create container state socket %s", offset);
+			if (errno == EADDRINUSE)
+				ERROR("Container \"%s\" appears to be already "
+				      "running!",
+				      name);
+			goto on_error;
+		}
+		TRACE("Container state socket \"%s\" is ready", offset);
+
+		ret = fcntl(handler->state_socket, F_SETFD, FD_CLOEXEC);
+		if (ret < 0) {
+			SYSERROR("Failed to set FD_CLOEXEC on container state socket");
+			goto on_error;
+		}
+
+		memcpy(handler->state_socket_name, path,
+		       sizeof(handler->state_socket_name));
+	}
+
 	if (lxc_cmd_init(name, handler, lxcpath)) {
 		ERROR("failed to set up command socket");
 		goto on_error;
diff --git a/src/lxc/start.h b/src/lxc/start.h
index 0be4ee191..5ecc2d57b 100644
--- a/src/lxc/start.h
+++ b/src/lxc/start.h
@@ -25,6 +25,8 @@
 
 #include <signal.h>
 #include <sys/param.h>
+#include <sys/socket.h>
+#include <sys/un.h>
 #include <stdbool.h>
 
 #include "conf.h"
@@ -52,6 +54,7 @@ struct lxc_handler {
 	int netnsfd;
 	struct lxc_list state_clients;
 	int state_socket;
+	char state_socket_name[sizeof(((struct sockaddr_un *)0)->sun_path)];
 };
 
 struct lxc_operations {
@@ -69,7 +72,8 @@ extern int lxc_set_state(const char *name, struct lxc_handler *handler, lxc_stat
 extern void lxc_abort(const char *name, struct lxc_handler *handler);
 extern struct lxc_handler *lxc_init_handler(const char *name,
 					    struct lxc_conf *conf,
-					    const char *lxcpath);
+					    const char *lxcpath,
+					    bool daemonize);
 extern void lxc_free_handler(struct lxc_handler *handler);
 extern int lxc_init(const char *name, struct lxc_handler *handler);
 extern void lxc_fini(const char *name, struct lxc_handler *handler);

From 68c002614c62660acb16a9c92446355f27b14c40 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Thu, 29 Jun 2017 15:01:11 +0200
Subject: [PATCH 3/3] test: shortlived daemonized containers

Add a test to see if we can start daemonized containers that have a very
short-lived init process. The point of this is to see whether we can correctly
retrieve the state.

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 .gitignore            | 1 +
 src/tests/Makefile.am | 4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 510bef25e..c88aea06f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -96,6 +96,7 @@ src/tests/lxc-test-utils*
 src/tests/lxc-usernic-test
 src/tests/lxc-test-config-jump-table
 src/tests/lxc-test-parse-config-file
+src/tests/lxc-test-shortlived
 
 config/compile
 config/config.guess
diff --git a/src/tests/Makefile.am b/src/tests/Makefile.am
index fbd6631b8..be6735e6e 100644
--- a/src/tests/Makefile.am
+++ b/src/tests/Makefile.am
@@ -26,6 +26,7 @@ lxc_test_apparmor_SOURCES = aa.c
 lxc_test_utils_SOURCES = lxc-test-utils.c lxctest.h
 lxc_test_parse_config_file_SOURCES = parse_config_file.c lxctest.h
 lxc_test_config_jump_table_SOURCES = config_jump_table.c lxctest.h
+lxc_test_shortlived_SOURCES = shortlived.c
 
 AM_CFLAGS=-DLXCROOTFSMOUNT=\"$(LXCROOTFSMOUNT)\" \
 	-DLXCPATH=\"$(LXCPATH)\" \
@@ -54,7 +55,7 @@ bin_PROGRAMS = lxc-test-containertests lxc-test-locktests lxc-test-startone \
 	lxc-test-snapshot lxc-test-concurrent lxc-test-may-control \
 	lxc-test-reboot lxc-test-list lxc-test-attach lxc-test-device-add-remove \
 	lxc-test-apparmor lxc-test-utils lxc-test-parse-config-file \
-	lxc-test-config-jump-table
+	lxc-test-config-jump-table lxc-test-shortlived
 
 bin_SCRIPTS = lxc-test-automount \
 	      lxc-test-autostart \
@@ -107,6 +108,7 @@ EXTRA_DIST = \
 	may_control.c \
 	parse_config_file.c \
 	saveconfig.c \
+	shortlived.c \
 	shutdowntest.c \
 	snapshot.c \
 	startone.c


More information about the lxc-devel mailing list