[lxc-devel] [PATCH 1/1] lxc_attach: fix break with user namespaces

Serge Hallyn serge.hallyn at canonical.com
Mon Jan 21 21:21:52 UTC 2013


When you clone a new user_ns, the child cannot write to the fds
opened by the parent.  Hnadle this by doing an extra fork.  The
grandparent hangs around and waits for its child to tell it the
pid of of the grandchild, which will be the one attached to the
container.  The grandparent then moves the grandchild into the
right cgroup, then waits for the child who in turn is waiting on
the grandchild to complete.

This lets lxc-attach work into another user namespace, but more
is needed ( which will come in subsequent patches ).  lxc-attach
will need to setuid to the uid of the container's init process,
because otherwise it is uid -1.  It will also need to be entered
into the apparmor or selinux domain of the child to prevent it
being used by a task in the container as a stepping stone to
greater privilege (i.e. through ptrace).

Signed-off-by: Serge Hallyn <serge.hallyn at ubuntu.com>
---
 src/lxc/lxc_attach.c | 89 +++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 61 insertions(+), 28 deletions(-)

diff --git a/src/lxc/lxc_attach.c b/src/lxc/lxc_attach.c
index 851a37a..6eeb324 100644
--- a/src/lxc/lxc_attach.c
+++ b/src/lxc/lxc_attach.c
@@ -130,6 +130,7 @@ int main(int argc, char *argv[])
 	void *cgroup_data = NULL;
 	uid_t uid;
 	char *curdir;
+	int mypipe[2];
 
 	ret = lxc_caps_init();
 	if (ret)
@@ -156,18 +157,6 @@ int main(int argc, char *argv[])
 		return -1;
 	}
 
-	if (!elevated_privileges) {
-	        /* we have to do this now since /sys/fs/cgroup may not
-	         * be available inside the container or we may not have
-	         * the required permissions anymore
-	         */
-		ret = lxc_cgroup_prepare_attach(my_args.name, &cgroup_data);
-		if (ret < 0) {
-			ERROR("failed to prepare attaching to cgroup");
-			return -1;
-		}
-	}
-
 	curdir = getcwd(NULL, 0);
 
 	/* determine which namespaces the container was created with
@@ -183,6 +172,58 @@ int main(int argc, char *argv[])
 		}
 	}
 
+	if (pipe(mypipe)) {
+		SYSERROR("failed creating communications pipe");
+		return -1;
+	}
+
+	pid = fork();
+	if (pid < 0) {
+		SYSERROR("failed to fork\n");
+		return -1;
+	}
+	if (pid) {
+		int status;
+		int gchild;
+
+		close(mypipe[1]);
+		if (read(mypipe[0], &gchild, sizeof(gchild)) <= 0) {
+			ERROR("failed to get pid from grand-child");
+			return -1;
+		}
+
+		if (!elevated_privileges) {
+			ret = lxc_cgroup_prepare_attach(my_args.name, &cgroup_data);
+			if (ret < 0) {
+				ERROR("failed to prepare attaching to cgroup");
+				return -1;
+			}
+
+			ret = lxc_cgroup_finish_attach(cgroup_data, gchild);
+			if (ret < 0) {
+				ERROR("failed to attach process to cgroup");
+				return -1;
+			}
+			lxc_cgroup_dispose_attach(cgroup_data);
+		}
+
+		close(mypipe[0]);
+	again1:
+		if (waitpid(pid, &status, 0) < 0) {
+			if (errno == EINTR)
+				goto again1;
+			SYSERROR("failed to wait '%d'", pid);
+			return -1;
+		}
+
+		if (WIFEXITED(status))
+			return WEXITSTATUS(status);
+
+		return -1;
+
+		return 0;
+	}
+
 	/* we need to attach before we fork since certain namespaces
 	 * (such as pid namespaces) only really affect children of the
 	 * current process and not the process itself
@@ -224,22 +265,13 @@ int main(int argc, char *argv[])
 		if (lxc_sync_wait_child(handler, LXC_SYNC_CONFIGURE))
 			return -1;
 
-		/* now that we are done with all privileged operations,
-		 * we can add ourselves to the cgroup. Since we smuggled in
-		 * the fds earlier, we still have write permission
-		 */
-		if (!elevated_privileges) {
-			/* since setns() for pid namespaces only really
-			 * affects child processes, the pid we have is
-			 * still valid outside the container, so this is
-			 * fine
-			 */
-			ret = lxc_cgroup_finish_attach(cgroup_data, pid);
-			if (ret < 0) {
-				ERROR("failed to attach process to cgroup");
-				return -1;
-			}
+		// ask parent to set cgroups for child
+		close(mypipe[0]);
+		if (write(mypipe[1], &pid, sizeof(pid)) != sizeof(pid)) {
+			ERROR("Error writing child's pid to parent");
+			return -1;
 		}
+		close(mypipe[1]);
 
 		/* tell the child we are done initializing */
 		if (lxc_sync_wake_child(handler, LXC_SYNC_POST_CONFIGURE))
@@ -263,7 +295,8 @@ int main(int argc, char *argv[])
 
 	if (!pid) {
 		lxc_sync_fini_parent(handler);
-		lxc_cgroup_dispose_attach(cgroup_data);
+		close(mypipe[0]);
+		close(mypipe[1]);
 
 		/* A description of the purpose of this functionality is
 		 * provided in the lxc-attach(1) manual page. We have to
-- 
1.8.0





More information about the lxc-devel mailing list