[lxc-devel] [PATCH 1/1] lxc_attach: fix break with user namespaces (v2)

Serge Hallyn serge.hallyn at canonical.com
Mon Jan 21 21:55:54 UTC 2013


When you clone a new user_ns, the child cannot write to the fds
opened by the parent.  Hnadle this by doing an extra fork.  The
grandparent hangs around and waits for its child to tell it the
pid of of the grandchild, which will be the one attached to the
container.  The grandparent then moves the grandchild into the
right cgroup, then waits for the child who in turn is waiting on
the grandchild to complete.

Secondly, when attaching to a new user namespace, your old uid is
not valid, so you are uid -1.  This patch simply does setid+setuid
to 0 if that is the case.  We probably want to be smarter, but
for now this allows lxc-attach to work.

TODO:  It will also need to be entered into the apparmor or selinux
domain of the child to prevent it being used by a task in the container
as a stepping stone to greater privilege (i.e. through ptrace).

Changelog: v2: fix duplicate free of cgroup data, and add the
  setuid when entering a new userns.

Signed-off-by: Serge Hallyn <serge.hallyn at ubuntu.com>
---
 src/lxc/lxc_attach.c | 101 +++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 73 insertions(+), 28 deletions(-)

diff --git a/src/lxc/lxc_attach.c b/src/lxc/lxc_attach.c
index 851a37a..8d80a8a 100644
--- a/src/lxc/lxc_attach.c
+++ b/src/lxc/lxc_attach.c
@@ -130,6 +130,7 @@ int main(int argc, char *argv[])
 	void *cgroup_data = NULL;
 	uid_t uid;
 	char *curdir;
+	int mypipe[2];
 
 	ret = lxc_caps_init();
 	if (ret)
@@ -156,18 +157,6 @@ int main(int argc, char *argv[])
 		return -1;
 	}
 
-	if (!elevated_privileges) {
-	        /* we have to do this now since /sys/fs/cgroup may not
-	         * be available inside the container or we may not have
-	         * the required permissions anymore
-	         */
-		ret = lxc_cgroup_prepare_attach(my_args.name, &cgroup_data);
-		if (ret < 0) {
-			ERROR("failed to prepare attaching to cgroup");
-			return -1;
-		}
-	}
-
 	curdir = getcwd(NULL, 0);
 
 	/* determine which namespaces the container was created with
@@ -183,6 +172,57 @@ int main(int argc, char *argv[])
 		}
 	}
 
+	if (pipe(mypipe)) {
+		SYSERROR("failed creating communications pipe");
+		return -1;
+	}
+
+	pid = fork();
+	if (pid < 0) {
+		SYSERROR("failed to fork\n");
+		return -1;
+	}
+	if (pid) {
+		int status;
+		int gchild;
+
+		close(mypipe[1]);
+		if (read(mypipe[0], &gchild, sizeof(gchild)) <= 0) {
+			ERROR("failed to get pid from grand-child");
+			return -1;
+		}
+
+		if (!elevated_privileges) {
+			ret = lxc_cgroup_prepare_attach(my_args.name, &cgroup_data);
+			if (ret < 0) {
+				ERROR("failed to prepare attaching to cgroup");
+				return -1;
+			}
+
+			ret = lxc_cgroup_finish_attach(cgroup_data, gchild);
+			if (ret < 0) {
+				ERROR("failed to attach process to cgroup");
+				return -1;
+			}
+		}
+
+		close(mypipe[0]);
+	again1:
+		if (waitpid(pid, &status, 0) < 0) {
+			if (errno == EINTR)
+				goto again1;
+			SYSERROR("failed to wait '%d'", pid);
+			return -1;
+		}
+
+		if (WIFEXITED(status))
+			return WEXITSTATUS(status);
+
+		return -1;
+
+		return 0;
+	}
+
 	/* we need to attach before we fork since certain namespaces
 	 * (such as pid namespaces) only really affect children of the
 	 * current process and not the process itself
@@ -224,22 +264,13 @@ int main(int argc, char *argv[])
 		if (lxc_sync_wait_child(handler, LXC_SYNC_CONFIGURE))
 			return -1;
 
-		/* now that we are done with all privileged operations,
-		 * we can add ourselves to the cgroup. Since we smuggled in
-		 * the fds earlier, we still have write permission
-		 */
-		if (!elevated_privileges) {
-			/* since setns() for pid namespaces only really
-			 * affects child processes, the pid we have is
-			 * still valid outside the container, so this is
-			 * fine
-			 */
-			ret = lxc_cgroup_finish_attach(cgroup_data, pid);
-			if (ret < 0) {
-				ERROR("failed to attach process to cgroup");
-				return -1;
-			}
+		// ask parent to set cgroups for child
+		close(mypipe[0]);
+		if (write(mypipe[1], &pid, sizeof(pid)) != sizeof(pid)) {
+			ERROR("Error writing child's pid to parent");
+			return -1;
 		}
+		close(mypipe[1]);
 
 		/* tell the child we are done initializing */
 		if (lxc_sync_wake_child(handler, LXC_SYNC_POST_CONFIGURE))
@@ -263,7 +294,8 @@ int main(int argc, char *argv[])
 
 	if (!pid) {
 		lxc_sync_fini_parent(handler);
-		lxc_cgroup_dispose_attach(cgroup_data);
+		close(mypipe[0]);
+		close(mypipe[1]);
 
 		/* A description of the purpose of this functionality is
 		 * provided in the lxc-attach(1) manual page. We have to
@@ -307,6 +339,19 @@ int main(int argc, char *argv[])
 			return -1;
 		}
 
+		if (namespace_flags & CLONE_NEWUSER) {
+			/* XXX FIXME this should get the uid of the container init and setuid to that */
+			/* XXX FIXME or perhaps try to map in the lxc-attach caller's uid? */
+			if (setgid(0)) {
+				SYSERROR("switching to container gid");
+				return -1;
+			}
+			if (setuid(0)) {
+				SYSERROR("switching to container uid");
+				return -1;
+			}
+		}
+
 		uid = getuid();
 
 		passwd = getpwuid(uid);
-- 
1.8.0





More information about the lxc-devel mailing list