[lxc-devel] [lxc/master] cgroups: handle older kernels (e.g. v4.9)

brauner on Github lxc-bot at linuxcontainers.org
Mon Mar 30 15:45:29 UTC 2020


A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 1076 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20200330/16089d01/attachment.bin>
-------------- next part --------------
From 22fda91eb24b84c74a9693168e66fae17ec15f52 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Mon, 30 Mar 2020 17:40:16 +0200
Subject: [PATCH] cgroups: handle older kernels (e.g. v4.9)

On olders kernels the restrictions to move processes between cgroups are
different than they are on newer kernels. Specifically, we're running into the
following check:

if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
    !uid_eq(cred->euid, tcred->uid) &&
    !uid_eq(cred->euid, tcred->suid))
        ret = -EACCES;

which dictates that in order to move a process into a cgroup one either needs
to be global root (no restrictions apply) or the effective uid of the process
trying to move the process and the {saved}uid of the process that is supposed
to be mvoed need to be identical. The new attaching logic we did didn't
fulfill this criterion for because it's not present on new kernels.

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 src/lxc/cgroups/cgfsng.c | 87 +++++++++++++++++++++++++++++++++++++---
 src/lxc/conf.c           | 13 ++++--
 src/lxc/conf.h           |  4 +-
 3 files changed, 95 insertions(+), 9 deletions(-)

diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c
index 5c37c842e0..dd5bccc7d7 100644
--- a/src/lxc/cgroups/cgfsng.c
+++ b/src/lxc/cgroups/cgfsng.c
@@ -30,6 +30,7 @@
 #include <sys/types.h>
 #include <unistd.h>
 
+#include "af_unix.h"
 #include "caps.h"
 #include "cgroup.h"
 #include "cgroup2_devices.h"
@@ -2123,20 +2124,80 @@ static int cgroup_attach_leaf(const struct lxc_conf *conf, int unified_fd, pid_t
 	return log_error_errno(-1, errno, "Failed to attach to unified cgroup");
 }
 
+static int cgroup_attach_create_leaf(const struct lxc_conf *conf,
+				     int unified_fd, int *sk_fd)
+{
+	__do_close int sk = *sk_fd, target_fd = -EBADF;
+	ssize_t ret;
+
+	/* Create leaf cgroup. */
+	ret = mkdirat(unified_fd, ".lxc", 0755);
+	if (ret < 0 && errno != EEXIST)
+		return log_error_errno(-1, errno, "Failed to create leaf cgroup \".lxc\"");
+
+	target_fd = openat(unified_fd, ".lxc/cgroup.procs", O_WRONLY | O_CLOEXEC | O_NOFOLLOW);
+	if (target_fd < 0)
+		return log_error_errno(-errno, errno, "Failed to open \".lxc/cgroup.procs\"");
+
+	ret = lxc_abstract_unix_send_fds(sk, &target_fd, 1, NULL, 0);
+	if (ret <= 0)
+		return log_error_errno(-errno, errno, "Failed to send \".lxc/cgroup.procs\" fd %d", target_fd);
+
+	return log_debug(0, "Sent target cgroup fd %d", target_fd);
+}
+
+static int cgroup_attach_move_into_leaf(const struct lxc_conf *conf,
+					int *sk_fd, pid_t pid)
+{
+	__do_close int sk = *sk_fd, target_fd = -EBADF;
+	char pidstr[INTTYPE_TO_STRLEN(int64_t) + 1];
+	size_t pidstr_len;
+	ssize_t ret;
+
+	ret = lxc_abstract_unix_recv_fds(sk, &target_fd, 1, NULL, 0);
+	if (ret <= 0)
+		return log_error_errno(-1, errno, "Failed to receive target cgroup fd");
+
+	pidstr_len = sprintf(pidstr, INT64_FMT, (int64_t)pid);
+
+	ret = lxc_write_nointr(target_fd, pidstr, pidstr_len);
+	if (ret != pidstr_len && errno != EBUSY)
+		return log_error_errno(-1, errno, "Failed to move process into target cgroup");
+
+	return log_debug(0, "Moved process into target cgroup");
+}
+
 struct userns_exec_unified_attach_data {
 	const struct lxc_conf *conf;
 	int unified_fd;
+	int sk_pair[2];
 	pid_t pid;
 };
 
-static int cgroup_unified_attach_wrapper(void *data)
+static int cgroup_unified_attach_child_wrapper(void *data)
+{
+	struct userns_exec_unified_attach_data *args = data;
+
+	if (!args->conf || args->unified_fd < 0 || args->pid <= 0 ||
+	    args->sk_pair[0] < 0 || args->sk_pair[1] < 0)
+		return ret_errno(EINVAL);
+
+	close_prot_errno_disarm(args->sk_pair[0]);
+	return cgroup_attach_create_leaf(args->conf, args->unified_fd,
+					 &args->sk_pair[1]);
+}
+
+static int cgroup_unified_attach_parent_wrapper(void *data)
 {
 	struct userns_exec_unified_attach_data *args = data;
 
-	if (!args->conf || args->unified_fd < 0 || args->pid <= 0)
+	if (!args->conf || args->unified_fd < 0 || args->pid <= 0 ||
+	    args->sk_pair[0] < 0 || args->sk_pair[1] < 0)
 		return ret_errno(EINVAL);
 
-	return cgroup_attach_leaf(args->conf, args->unified_fd, args->pid);
+	close_prot_errno_disarm(args->sk_pair[1]);
+	return cgroup_attach_move_into_leaf(args->conf, &args->sk_pair[0],
+					    args->pid);
 }
 
 int cgroup_attach(const struct lxc_conf *conf, const char *name,
@@ -2159,7 +2220,15 @@ int cgroup_attach(const struct lxc_conf *conf, const char *name,
 			.pid		= pid,
 		};
 
-		ret = userns_exec_minimal(conf, cgroup_unified_attach_wrapper, &args);
+		ret = socketpair(PF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, args.sk_pair);
+		if (ret < 0)
+			return -errno;
+
+		ret = userns_exec_minimal(conf,
+					  cgroup_unified_attach_parent_wrapper,
+					  &args,
+					  cgroup_unified_attach_child_wrapper,
+					  &args);
 	} else {
 		ret = cgroup_attach_leaf(conf, unified_fd, pid);
 	}
@@ -2213,7 +2282,15 @@ static int __cg_unified_attach(const struct hierarchy *h,
 			.pid		= pid,
 		};
 
-		ret = userns_exec_minimal(conf, cgroup_unified_attach_wrapper, &args);
+		ret = socketpair(PF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, args.sk_pair);
+		if (ret < 0)
+			return -errno;
+
+		ret = userns_exec_minimal(conf,
+					  cgroup_unified_attach_parent_wrapper,
+					  &args,
+					  cgroup_unified_attach_child_wrapper,
+					  &args);
 	} else {
 		ret = cgroup_attach_leaf(conf, unified_fd, pid);
 	}
diff --git a/src/lxc/conf.c b/src/lxc/conf.c
index 9fc5c68a7a..2f6be9f263 100644
--- a/src/lxc/conf.c
+++ b/src/lxc/conf.c
@@ -4126,7 +4126,9 @@ int userns_exec_1(const struct lxc_conf *conf, int (*fn)(void *), void *data,
 	return ret;
 }
 
-int userns_exec_minimal(const struct lxc_conf *conf, int (*fn)(void *), void *data)
+int userns_exec_minimal(const struct lxc_conf *conf,
+			int (*fn_parent)(void *), void *fn_parent_data,
+			int (*fn_child)(void *), void *fn_child_data)
 {
 	call_cleaner(lxc_free_idmap) struct lxc_list *idmap = NULL;
 	uid_t resuid = LXC_INVALID_UID;
@@ -4136,7 +4138,7 @@ int userns_exec_minimal(const struct lxc_conf *conf, int (*fn)(void *), void *da
 	pid_t pid;
 	int sock_fds[2];
 
-	if (!conf || !fn || !data)
+	if (!conf || !fn_child)
 		return ret_errno(EINVAL);
 
 	idmap = get_minimal_idmap(conf, &resuid, &resgid);
@@ -4189,7 +4191,7 @@ int userns_exec_minimal(const struct lxc_conf *conf, int (*fn)(void *), void *da
 			_exit(EXIT_FAILURE);
 		}
 
-		ret = fn(data);
+		ret = fn_child(fn_child_data);
 		if (ret) {
 			SYSERROR("Running function in new user namespace failed");
 			_exit(EXIT_FAILURE);
@@ -4232,6 +4234,11 @@ int userns_exec_minimal(const struct lxc_conf *conf, int (*fn)(void *), void *da
 		goto on_error;
 	}
 
+	if (fn_parent && fn_parent(fn_parent_data)) {
+		SYSERROR("Running parent function failed");
+		_exit(EXIT_FAILURE);
+	}
+
 on_error:
 	close_prot_errno_disarm(sock_fds[0]);
 	close_prot_errno_disarm(sock_fds[1]);
diff --git a/src/lxc/conf.h b/src/lxc/conf.h
index 2bd2a203a6..64885c35ea 100644
--- a/src/lxc/conf.h
+++ b/src/lxc/conf.h
@@ -467,6 +467,8 @@ extern int setup_proc_filesystem(struct lxc_list *procs, pid_t pid);
 extern int lxc_clear_procs(struct lxc_conf *c, const char *key);
 extern int lxc_clear_apparmor_raw(struct lxc_conf *c);
 extern int lxc_clear_namespace(struct lxc_conf *c);
-extern int userns_exec_minimal(const struct lxc_conf *conf, int (*fn)(void *), void *data);
+extern int userns_exec_minimal(const struct lxc_conf *conf,
+			       int (*fn_parent)(void *), void *fn_parent_data,
+			       int (*fn_child)(void *), void *fn_child_data);
 
 #endif /* __LXC_CONF_H */


More information about the lxc-devel mailing list