[lxc-devel] [lxc/master] start: intelligently use clone() on ns sharing

brauner on Github lxc-bot at linuxcontainers.org
Mon Dec 11 13:49:59 UTC 2017


A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 1198 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20171211/48051d61/attachment.bin>
-------------- next part --------------
From 6804ba45795ebfa9369a5447bf94372ae61228d1 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Mon, 11 Dec 2017 12:55:23 +0100
Subject: [PATCH 1/2] start: intelligently use clone() on ns sharing

When I first solved this problem I went for a fork() + setns() + clone() model.
This works fine but has unnecessary overhead for a couple of reasons:

- doing a full fork() including copying file descriptor table and virtual
  memory
- using pipes to retrieve the pid of the second child (the actual container
  process)

This can all be avoided by being a little smart in how we employ the clone()
syscall:

- using CLONE_VM will let us get rid of using pipes since we can simply write
  to the handler because we share the memory with our parent
- using CLONE_VFORK will also let us get rid of using pipes since the execution
  of the parent is suspended until the child returns
- using CLONE_VM will let not cause virtual memory to be copied
- using CLONE_FILES will not cause the file descriptor table to be copied

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 src/lxc/start.c | 61 +++++++++++++++++++++++----------------------------------
 src/lxc/start.h |  6 ++++++
 2 files changed, 30 insertions(+), 37 deletions(-)

diff --git a/src/lxc/start.c b/src/lxc/start.c
index b82768687..d56e87d53 100644
--- a/src/lxc/start.c
+++ b/src/lxc/start.c
@@ -1157,43 +1157,30 @@ void resolve_clone_flags(struct lxc_handler *handler)
 		INFO("Inheriting pid namespace");
 }
 
-static pid_t lxc_fork_attach_clone(int (*fn)(void *), void *arg, int flags)
+
+static inline int do_share_ns(void *arg)
 {
-	int i, r, ret;
-	pid_t pid, init_pid;
+	int i, flags, ret;
 	struct lxc_handler *handler = arg;
 
-	pid = fork();
-	if (pid < 0)
-		return -1;
-
-	if (!pid) {
-		for (i = 0; i < LXC_NS_MAX; i++) {
-			if (handler->nsfd[i] < 0)
-				continue;
-
-			ret = setns(handler->nsfd[i], 0);
-			if (ret < 0)
-				exit(EXIT_FAILURE);
-
-			DEBUG("Inherited %s namespace", ns_info[i].proc_name);
-		}
+	for (i = 0; i < LXC_NS_MAX; i++) {
+		if (handler->nsfd[i] < 0)
+			continue;
 
-		init_pid = lxc_clone(do_start, handler, flags);
-		ret = lxc_write_nointr(handler->data_sock[1], &init_pid,
-				       sizeof(init_pid));
+		ret = setns(handler->nsfd[i], 0);
 		if (ret < 0)
-			exit(EXIT_FAILURE);
+			return -1;
 
-		exit(EXIT_SUCCESS);
+		DEBUG("Inherited %s namespace", ns_info[i].proc_name);
 	}
 
-	r = lxc_read_nointr(handler->data_sock[0], &init_pid, sizeof(init_pid));
-	ret = wait_for_pid(pid);
-	if (ret < 0 || r < 0)
+	flags = handler->on_clone_flags;
+	flags |= CLONE_PARENT;
+	handler->pid = lxc_clone(do_start, handler, flags);
+	if (handler->pid < 0)
 		return -1;
 
-	return init_pid;
+	return 0;
 }
 
 /* lxc_spawn() performs crucial setup tasks and clone()s the new process which
@@ -1205,13 +1192,13 @@ static pid_t lxc_fork_attach_clone(int (*fn)(void *), void *arg, int flags)
  */
 static int lxc_spawn(struct lxc_handler *handler)
 {
-	int i, flags, ret;
+	int i, ret;
 	char pidstr[20];
 	bool wants_to_map_ids;
 	struct lxc_list *id_map;
 	const char *name = handler->name;
 	const char *lxcpath = handler->lxcpath;
-	bool cgroups_connected = false, fork_before_clone = false;
+	bool cgroups_connected = false, share_ns = false;
 	struct lxc_conf *conf = handler->conf;
 
 	id_map = &conf->id_map;
@@ -1225,7 +1212,7 @@ static int lxc_spawn(struct lxc_handler *handler)
 		if (handler->nsfd[i] < 0)
 			return -1;
 
-		fork_before_clone = true;
+		share_ns = true;
 	}
 
 	if (lxc_sync_init(handler))
@@ -1288,28 +1275,28 @@ static int lxc_spawn(struct lxc_handler *handler)
 	}
 
 	/* Create a process in a new set of namespaces. */
-	flags = handler->clone_flags;
+	handler->on_clone_flags = handler->clone_flags;
 	if (handler->clone_flags & CLONE_NEWUSER) {
 		/* If CLONE_NEWUSER and CLONE_NEWNET was requested, we need to
 		 * clone a new user namespace first and only later unshare our
 		 * network namespace to ensure that network devices ownership is
 		 * set up correctly.
 		 */
-		flags &= ~CLONE_NEWNET;
+		handler->on_clone_flags &= ~CLONE_NEWNET;
 	}
 
-	if (fork_before_clone)
-		handler->pid = lxc_fork_attach_clone(do_start, handler, flags | CLONE_PARENT);
+	if (share_ns)
+		ret = lxc_clone(do_share_ns, handler, CLONE_VFORK | CLONE_VM | CLONE_FILES);
 	else
-		handler->pid = lxc_clone(do_start, handler, flags);
-	if (handler->pid < 0) {
+		handler->pid = lxc_clone(do_start, handler, handler->on_clone_flags);
+	if (handler->pid < 0 || ret < 0) {
 		SYSERROR("Failed to clone a new set of namespaces.");
 		goto out_delete_net;
 	}
 	TRACE("Cloned child process %d", handler->pid);
 
 	for (i = 0; i < LXC_NS_MAX; i++)
-		if (flags & ns_info[i].clone_flag)
+		if (handler->on_clone_flags & ns_info[i].clone_flag)
 			INFO("Cloned %s", ns_info[i].flag_name);
 
 	if (!preserve_ns(handler->nsfd, handler->clone_flags & ~CLONE_NEWNET, handler->pid)) {
diff --git a/src/lxc/start.h b/src/lxc/start.h
index e2b13141b..2161565d4 100644
--- a/src/lxc/start.h
+++ b/src/lxc/start.h
@@ -40,6 +40,12 @@ struct lxc_handler {
 	/* The clone flags that were requested. */
 	int clone_flags;
 
+	/* The clone flags to actually use when calling lxc_clone(). They may
+	 * differ from clone_flags because of ordering requirements (e.g.
+	 * CLONE_NEWNET and CLONE_NEWUSER).
+	 */
+	int on_clone_flags;
+
 	/* File descriptor to pin the rootfs for privileged containers. */
 	int pinfd;
 

From fae592368e1ddfe7a91e85c7186e2d8d24f004d0 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Mon, 11 Dec 2017 14:47:24 +0100
Subject: [PATCH 2/2] tests: add namespace sharing tests

This also ensures that the new more efficient clone() way of sharing namespaces
is tested.

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 src/tests/Makefile.am |   6 +-
 src/tests/share_ns.c  | 317 ++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 321 insertions(+), 2 deletions(-)
 create mode 100644 src/tests/share_ns.c

diff --git a/src/tests/Makefile.am b/src/tests/Makefile.am
index f223463d7..b38c93c67 100644
--- a/src/tests/Makefile.am
+++ b/src/tests/Makefile.am
@@ -31,6 +31,7 @@ lxc_test_config_jump_table_SOURCES = config_jump_table.c lxctest.h
 lxc_test_shortlived_SOURCES = shortlived.c
 lxc_test_livepatch_SOURCES = livepatch.c lxctest.h
 lxc_test_state_server_SOURCES = state_server.c lxctest.h
+lxc_test_share_ns_SOURCES = share_ns.c lxctest.h
 
 AM_CFLAGS=-DLXCROOTFSMOUNT=\"$(LXCROOTFSMOUNT)\" \
 	-DLXCPATH=\"$(LXCPATH)\" \
@@ -60,7 +61,7 @@ bin_PROGRAMS = lxc-test-containertests lxc-test-locktests lxc-test-startone \
 	lxc-test-reboot lxc-test-list lxc-test-attach lxc-test-device-add-remove \
 	lxc-test-apparmor lxc-test-utils lxc-test-parse-config-file \
 	lxc-test-config-jump-table lxc-test-shortlived lxc-test-livepatch \
-	lxc-test-api-reboot lxc-test-state-server
+	lxc-test-api-reboot lxc-test-state-server lxc-test-share-ns
 
 bin_SCRIPTS = lxc-test-automount \
 	      lxc-test-autostart \
@@ -121,7 +122,8 @@ EXTRA_DIST = \
 	shutdowntest.c \
 	snapshot.c \
 	startone.c \
-	state_server.c
+	state_server.c \
+	share_ns.c
 
 clean-local:
 	rm -f lxc-test-utils-*
diff --git a/src/tests/share_ns.c b/src/tests/share_ns.c
new file mode 100644
index 000000000..620527699
--- /dev/null
+++ b/src/tests/share_ns.c
@@ -0,0 +1,317 @@
+/* liblxcapi
+ *
+ * Copyright © 2017 Christian Brauner <christian.brauner at ubuntu.com>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <alloca.h>
+#include <errno.h>
+#include <pthread.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/reboot.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+#include "lxc/lxccontainer.h"
+#include "lxctest.h"
+
+struct thread_args {
+	int thread_id;
+	int timeout;
+	bool success;
+	pid_t init_pid;
+	char *inherited_ipc_ns;
+	char *inherited_net_ns;
+};
+
+void *ns_sharing_wrapper(void *data)
+{
+	int init_pid;
+	ssize_t ret;
+	char name[100];
+	char owning_ns_init_pid[100];
+	char proc_ns_path[4096];
+	char ns_buf[4096];
+	struct lxc_container *c;
+	struct thread_args *args = data;
+
+	lxc_debug("Starting namespace sharing thread %d\n", args->thread_id);
+
+	sprintf(name, "share-ns-%d", args->thread_id);
+	c = lxc_container_new(name, NULL);
+	if (!c) {
+		lxc_error("Failed to create container \"%s\"\n", name);
+		goto out;
+	}
+
+	if (c->is_defined(c)) {
+		lxc_error("Container \"%s\" is defined\n", name);
+		goto out;
+	}
+
+	if (!c->createl(c, "busybox", NULL, NULL, 0, NULL)) {
+		lxc_error("Failed to create busybox container \"%s\"\n", name);
+		goto out;
+	}
+
+	if (!c->is_defined(c)) {
+		lxc_error("Container \"%s\" is not defined\n", name);
+		goto out;
+	}
+
+	if (!c->load_config(c, NULL)) {
+		lxc_error("Failed to load config for container \"%s\"\n", name);
+		goto out;
+	}
+
+	/* share ipc namespace by container name */
+	if (!c->set_config_item(c, "lxc.namespace.ipc", "owning-ns")) {
+		lxc_error("Failed to set \"lxc.namespace.ipc=owning-ns\" for container \"%s\"\n", name);
+		goto out;
+	}
+
+	/* clear all network configuration */
+	if (!c->set_config_item(c, "lxc.net", "")) {
+		lxc_error("Failed to set \"lxc.namespace.ipc=owning-ns\" for container \"%s\"\n", name);
+		goto out;
+	}
+
+	if (!c->set_config_item(c, "lxc.net.0.type", "empty")) {
+		lxc_error("Failed to set \"lxc.net.0.type=empty\" for container \"%s\"\n", name);
+		goto out;
+	}
+
+	sprintf(owning_ns_init_pid, "%d", args->init_pid);
+	/* share net namespace by pid */
+	if (!c->set_config_item(c, "lxc.namespace.net", owning_ns_init_pid)) {
+		lxc_error("Failed to set \"lxc.namespace.net=%s\" for container \"%s\"\n", owning_ns_init_pid, name);
+		goto out;
+	}
+
+	if (!c->want_daemonize(c, true)) {
+		lxc_error("Failed to mark container \"%s\" daemonized\n", name);
+		goto out;
+	}
+
+	if (!c->startl(c, 0, NULL)) {
+		lxc_error("Failed to start container \"%s\" daemonized\n", name);
+		goto out;
+	}
+
+	init_pid = c->init_pid(c);
+	if (init_pid < 0) {
+		lxc_error("Failed to retrieve init pid of container \"%s\"\n", name);
+		goto out;
+	}
+
+	/* Check whether we correctly inherited the ipc namespace. */
+	ret = snprintf(proc_ns_path, sizeof(proc_ns_path), "/proc/%d/ns/ipc", init_pid);
+	if (ret < 0 || (size_t)ret >= sizeof(proc_ns_path)) {
+		lxc_error("Failed to create string for container \"%s\"\n", name);
+		goto out;
+	}
+
+	ret = readlink(proc_ns_path, ns_buf, sizeof(ns_buf));
+	if (ret < 0 || (size_t)ret >= sizeof(ns_buf)) {
+		lxc_error("Failed to retrieve ipc namespace for container \"%s\"\n", name);
+		goto out;
+	}
+	ns_buf[ret] = '\0';
+
+	if (strcmp(args->inherited_ipc_ns, ns_buf) != 0) {
+		lxc_error("Failed to inherit ipc namespace from container \"owning-ns\": %s != %s\n", args->inherited_ipc_ns, ns_buf);
+		goto out;
+	}
+	lxc_debug("Inherited ipc namespace from container \"owning-ns\": %s == %s\n", args->inherited_ipc_ns, ns_buf);
+
+	/* Check whether we correctly inherited the net namespace. */
+	ret = snprintf(proc_ns_path, sizeof(proc_ns_path), "/proc/%d/ns/net", init_pid);
+	if (ret < 0 || (size_t)ret >= sizeof(proc_ns_path)) {
+		lxc_error("Failed to create string for container \"%s\"\n", name);
+		goto out;
+	}
+
+	ret = readlink(proc_ns_path, ns_buf, sizeof(ns_buf));
+	if (ret < 0 || (size_t)ret >= sizeof(ns_buf)) {
+		lxc_error("Failed to retrieve ipc namespace for container \"%s\"\n", name);
+		goto out;
+	}
+	ns_buf[ret] = '\0';
+
+	if (strcmp(args->inherited_net_ns, ns_buf) != 0) {
+		lxc_error("Failed to inherit net namespace from container \"owning-ns\": %s != %s\n", args->inherited_net_ns, ns_buf);
+		goto out;
+	}
+	lxc_debug("Inherited net namespace from container \"owning-ns\": %s == %s\n", args->inherited_net_ns, ns_buf);
+
+	args->success = true;
+
+out:
+	if (c->is_running(c) && !c->stop(c)) {
+		lxc_error("Failed to stop container \"%s\"\n", name);
+		goto out;
+	}
+
+	if (!c->destroy(c)) {
+		lxc_error("Failed to destroy container \"%s\"\n", name);
+		goto out;
+	}
+
+	pthread_exit(NULL);
+	return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+	int i, init_pid, j;
+	char proc_ns_path[4096];
+	char ipc_ns_buf[4096];
+	char net_ns_buf[4096];
+	pthread_attr_t attr;
+	pthread_t threads[10];
+	struct thread_args args[10];
+	struct lxc_container *c;
+	int ret = EXIT_FAILURE;
+
+	c = lxc_container_new("owning-ns", NULL);
+	if (!c) {
+		lxc_error("%s", "Failed to create container \"owning-ns\"");
+		exit(ret);
+	}
+
+	if (c->is_defined(c)) {
+		lxc_error("%s\n", "Container \"owning-ns\" is defined");
+		goto on_error_put;
+	}
+
+	if (!c->createl(c, "busybox", NULL, NULL, 0, NULL)) {
+		lxc_error("%s\n", "Failed to create busybox container \"owning-ns\"");
+		goto on_error_put;
+	}
+
+	if (!c->is_defined(c)) {
+		lxc_error("%s\n", "Container \"owning-ns\" is not defined");
+		goto on_error_put;
+	}
+
+	c->clear_config(c);
+
+	if (!c->load_config(c, NULL)) {
+		lxc_error("%s\n", "Failed to load config for container \"owning-ns\"");
+		goto on_error_stop;
+	}
+
+	if (!c->want_daemonize(c, true)) {
+		lxc_error("%s\n", "Failed to mark container \"owning-ns\" daemonized");
+		goto on_error_stop;
+	}
+
+	if (!c->startl(c, 0, NULL)) {
+		lxc_error("%s\n", "Failed to start container \"owning-ns\" daemonized");
+		goto on_error_stop;
+	}
+
+	init_pid = c->init_pid(c);
+	if (init_pid < 0) {
+		lxc_error("%s\n", "Failed to retrieve init pid of container \"owning-ns\"");
+		goto on_error_stop;
+	}
+
+	/* record our ipc namespace */
+	ret = snprintf(proc_ns_path, sizeof(proc_ns_path), "/proc/%d/ns/ipc", init_pid);
+	if (ret < 0 || (size_t)ret >= sizeof(proc_ns_path)) {
+		lxc_error("%s\n", "Failed to create string for container \"owning-ns\"");
+		goto on_error_stop;
+	}
+
+	ret = readlink(proc_ns_path, ipc_ns_buf, sizeof(ipc_ns_buf));
+	if (ret < 0 || (size_t)ret >= sizeof(ipc_ns_buf)) {
+		lxc_error("%s\n", "Failed to retrieve ipc namespace for container \"owning-ns\"");
+		goto on_error_stop;
+
+	}
+	ipc_ns_buf[ret] = '\0';
+
+	/* record our net namespace */
+	ret = snprintf(proc_ns_path, sizeof(proc_ns_path), "/proc/%d/ns/net", init_pid);
+	if (ret < 0 || (size_t)ret >= sizeof(proc_ns_path)) {
+		lxc_error("%s\n", "Failed to create string for container \"owning-ns\"");
+		goto on_error_stop;
+	}
+
+	ret = readlink(proc_ns_path, net_ns_buf, sizeof(net_ns_buf));
+	if (ret < 0 || (size_t)ret >= sizeof(net_ns_buf)) {
+		lxc_error("%s\n", "Failed to retrieve ipc namespace for container \"owning-ns\"");
+		goto on_error_stop;
+	}
+	net_ns_buf[ret] = '\0';
+
+	sleep(5);
+
+	pthread_attr_init(&attr);
+
+	for (j = 0; j < 10; j++) {
+		lxc_debug("Starting namespace sharing test iteration %d\n", j);
+
+		for (i = 0; i < 10; i++) {
+			int ret;
+
+			args[i].thread_id = i;
+			args[i].init_pid = init_pid;
+			args[i].timeout = -1;
+			args[i].inherited_ipc_ns = ipc_ns_buf;
+			args[i].inherited_net_ns = net_ns_buf;
+			/* test non-blocking shutdown request */
+			if (i == 0)
+				args[i].timeout = 0;
+
+			ret = pthread_create(&threads[i], &attr, ns_sharing_wrapper, (void *) &args[i]);
+			if (ret != 0)
+				goto on_error_stop;
+		}
+
+		for (i = 0; i < 10; i++) {
+			int ret;
+
+			ret = pthread_join(threads[i], NULL);
+			if (ret != 0)
+				goto on_error_stop;
+
+			if (!args[i].success) {
+				lxc_error("ns sharing thread %d failed\n", args[i].thread_id);
+				goto on_error_stop;
+			}
+		}
+	}
+
+	ret = EXIT_SUCCESS;
+
+on_error_stop:
+	if (c->is_running(c) && !c->stop(c))
+		lxc_error("%s\n", "Failed to stop container \"owning-ns\"");
+
+	if (!c->destroy(c))
+		lxc_error("%s\n", "Failed to destroy container \"owning-ns\"");
+
+on_error_put:
+	lxc_container_put(c);
+	if (ret == EXIT_SUCCESS)
+		lxc_debug("%s\n", "All state namespace sharing tests passed");
+	exit(ret);
+}


More information about the lxc-devel mailing list