[lxc-devel] [lxc/master] call lxc_setup() after unshare(CLONE_NEWCGROUP)

brauner on Github lxc-bot at linuxcontainers.org
Thu Jun 1 04:07:46 UTC 2017


A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 1393 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20170601/2b7dfe23/attachment.bin>
-------------- next part --------------
From 65e352eddcc9afa2e695ccfb13222f552fe90dd6 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Thu, 1 Jun 2017 05:23:12 +0200
Subject: [PATCH 1/2] start: lxc_setup() after unshare(CLONE_NEWCGROUP)

When the running kernel supports cgroup namespace and users want to manually
set up cgroups via lxc.hook.mount before the init binary starts the cgroup
namespace needs to be already unshared. Otherwise the view on the cgroup mounts
is wrong. This commit places the call to lxc_setup() after the
LXC_SYNC_POST_CGROUP barrier.

This idea also made me aware of a security improvement which should harden our
tty handling code a bit. Before this commit, the tty fds we allocate from a
fresh devpts instance in the container's namespaces before the init binary
starts were referring to the hosts cgroup namespace since lxc_setup() was
called before unshare(CLONE_NEWCGROUP). This leaves room for the vague
possibility of setns()ing to the parent's cgroup namespaces via one of those
fds. This commit makes sure that the ttys are allocated after *all* namespaces
have been created.

Adding a Suggested-by line for the lxc.mount.hook fix for Quentin.

Closes #1597.

Suggested-by: Quentin Dufour <quentin at dufour.tk>
Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 src/lxc/start.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/lxc/start.c b/src/lxc/start.c
index f1b3f8e11..e6864b349 100644
--- a/src/lxc/start.c
+++ b/src/lxc/start.c
@@ -820,12 +820,6 @@ static int do_start(void *data)
 		     "standard file descriptors. Migration will not work.");
 	}
 
-	/* Setup the container, ip, names, utsname, ... */
-	if (lxc_setup(handler)) {
-		ERROR("Failed to setup container \"%s\".", handler->name);
-		goto out_warn_father;
-	}
-
 	/* Ask father to setup cgroups and wait for him to finish. */
 	if (lxc_sync_barrier_parent(handler, LXC_SYNC_CGROUP))
 		goto out_error;
@@ -850,6 +844,12 @@ static int do_start(void *data)
 		INFO("Unshared CLONE_NEWCGROUP.");
 	}
 
+	/* Setup the container, ip, names, utsname, ... */
+	if (lxc_setup(handler)) {
+		ERROR("Failed to setup container \"%s\".", handler->name);
+		goto out_warn_father;
+	}
+
 	/* Set the label to change to when we exec(2) the container's init. */
 	if (lsm_process_label_set(NULL, handler->conf, 1, 1) < 0)
 		goto out_warn_father;
@@ -1285,12 +1285,6 @@ static int lxc_spawn(struct lxc_handler *handler)
 	cgroup_disconnect();
 	cgroups_connected = false;
 
-	/* Read tty fds allocated by child. */
-	if (recv_ttys_from_child(handler) < 0) {
-		ERROR("Failed to receive tty info from child process.");
-		goto out_delete_net;
-	}
-
 	/* Tell the child to complete its initialization and wait for it to exec
 	 * or return an error. (The child will never return
 	 * LXC_SYNC_POST_CGROUP+1. It will either close the sync pipe, causing
@@ -1300,6 +1294,12 @@ static int lxc_spawn(struct lxc_handler *handler)
 	if (lxc_sync_barrier_child(handler, LXC_SYNC_POST_CGROUP))
 		return -1;
 
+	/* Read tty fds allocated by child. */
+	if (recv_ttys_from_child(handler) < 0) {
+		ERROR("Failed to receive tty info from child process.");
+		goto out_delete_net;
+	}
+
 	if (handler->ops->post_start(handler, handler->data))
 		goto out_abort;
 

From 4ff26b5a236babf4b7469af62c52fd8c6dd1c558 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Thu, 1 Jun 2017 05:40:59 +0200
Subject: [PATCH 2/2] start: log sending and receiving of tty fds

This is a security sensitive operation and I really want to keep an eye on
*when exactly* this is send. So add more logging on the TRACE() level.

Note that with recent changes, the tty fds are received *after* the init binary
starts but this doesn't have security implications for us since the ttys are
allocated in the container's namespaces and sent to the parent before the init
binary starts. The receiving takes place in the parent's namespaces and doesn't
need to care whether the init binary already runs or not.

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 src/lxc/conf.c  | 19 ++++++++++++++-----
 src/lxc/start.c | 16 ++++++++++++----
 2 files changed, 26 insertions(+), 9 deletions(-)

diff --git a/src/lxc/conf.c b/src/lxc/conf.c
index b65c4add8..cf818066f 100644
--- a/src/lxc/conf.c
+++ b/src/lxc/conf.c
@@ -4066,21 +4066,30 @@ static int send_fd(int sock, int fd)
 
 static int send_ttys_to_parent(struct lxc_handler *handler)
 {
+	int i, ret;
 	struct lxc_conf *conf = handler->conf;
 	const struct lxc_tty_info *tty_info = &conf->tty_info;
-	int i;
 	int sock = handler->ttysock[0];
 
 	for (i = 0; i < tty_info->nbtty; i++) {
 		struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
-		if (send_fd(sock, pty_info->slave) < 0)
-			goto bad;
+		ret = send_fd(sock, pty_info->slave);
+		if (ret >= 0)
+			send_fd(sock, pty_info->master);
+		TRACE("sending pty \"%s\" with master fd %d and slave fd %d to "
+		      "parent",
+		      pty_info->name, pty_info->master, pty_info->slave);
 		close(pty_info->slave);
 		pty_info->slave = -1;
-		if (send_fd(sock, pty_info->master) < 0)
-			goto bad;
 		close(pty_info->master);
 		pty_info->master = -1;
+		if (ret < 0) {
+			ERROR("failed to send pty \"%s\" with master fd %d and "
+			      "slave fd %d to parent : %s",
+			      pty_info->name, pty_info->master, pty_info->slave,
+			      strerror(errno));
+			goto bad;
+		}
 	}
 
 	close(handler->ttysock[0]);
diff --git a/src/lxc/start.c b/src/lxc/start.c
index e6864b349..38b45939f 100644
--- a/src/lxc/start.c
+++ b/src/lxc/start.c
@@ -1021,8 +1021,9 @@ static int recv_fd(int sock, int *fd)
 
 static int recv_ttys_from_child(struct lxc_handler *handler)
 {
+	int i, ret;
+	int sock = handler->ttysock[1];
 	struct lxc_conf *conf = handler->conf;
-	int i, sock = handler->ttysock[1];
 	struct lxc_tty_info *tty_info = &conf->tty_info;
 
 	if (!conf->tty)
@@ -1035,11 +1036,18 @@ static int recv_ttys_from_child(struct lxc_handler *handler)
 	for (i = 0; i < conf->tty; i++) {
 		struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
 		pty_info->busy = 0;
-		if (recv_fd(sock, &pty_info->slave) < 0 ||
-		    recv_fd(sock, &pty_info->master) < 0) {
-			ERROR("Error receiving tty info from child process.");
+		ret = recv_fd(sock, &pty_info->slave);
+		if (ret >= 0)
+			recv_fd(sock, &pty_info->master);
+		if (ret < 0) {
+			ERROR("failed to receive pty with master fd %d and "
+			      "slave fd %d from child: %s",
+			      pty_info->master, pty_info->slave,
+			      strerror(errno));
 			return -1;
 		}
+		TRACE("received pty with master fd %d and slave fd %d from child",
+		      pty_info->master, pty_info->slave);
 	}
 	tty_info->nbtty = conf->tty;
 


More information about the lxc-devel mailing list