[lxc-devel] [lxc/master] cgroups: improve container cgroup attaching

Wed Dec 4 12:35:27 UTC 2019

A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 925 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20191204/16311344/attachment.bin>
-------------- next part --------------
From 23a917e5d242698607edc4cf5dd270bb744e843d Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Wed, 4 Dec 2019 13:26:23 +0100
Subject: [PATCH 1/2] commands: use logging return helpers

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 src/lxc/commands.c | 87 +++++++++++++++++++++-------------------------
 src/lxc/log.h      | 13 +++++++
 2 files changed, 53 insertions(+), 47 deletions(-)

diff --git a/src/lxc/commands.c b/src/lxc/commands.c
index f397d3c61d..ccf8cf8905 100644
--- a/src/lxc/commands.c
+++ b/src/lxc/commands.c
@@ -131,19 +131,15 @@ static const char *lxc_cmd_str(lxc_cmd_t cmd)
  */
 static int lxc_cmd_rsp_recv(int sock, struct lxc_cmd_rr *cmd)
 {
-	int ret, rspfd;
+	__do_close_prot_errno int fd_rsp = -EBADF;
+	int ret;
 	struct lxc_cmd_rsp *rsp = &cmd->rsp;
 
-	ret = lxc_abstract_unix_recv_fds(sock, &rspfd, 1, rsp, sizeof(*rsp));
-	if (ret < 0) {
-		SYSWARN("Failed to receive response for command \"%s\"",
-		        lxc_cmd_str(cmd->req.cmd));
-
-		if (errno == ECONNRESET)
-			return -1;
-
-		return -1;
-	}
+	ret = lxc_abstract_unix_recv_fds(sock, &fd_rsp, 1, rsp, sizeof(*rsp));
+	if (ret < 0)
+		return log_warn_errno(-1,
+				      errno, "Failed to receive response for command \"%s\"",
+				      lxc_cmd_str(cmd->req.cmd));
 	TRACE("Command \"%s\" received response", lxc_cmd_str(cmd->req.cmd));
 
 	if (cmd->req.cmd == LXC_CMD_CONSOLE) {
@@ -156,33 +152,31 @@ static int lxc_cmd_rsp_recv(int sock, struct lxc_cmd_rr *cmd)
 			return 0;
 
 		rspdata = malloc(sizeof(*rspdata));
-		if (!rspdata) {
-			errno = ENOMEM;
-			ERROR("Failed to allocate response buffer for command \"%s\"",
-			      lxc_cmd_str(cmd->req.cmd));
-			return -1;
-		}
+		if (!rspdata)
+			return log_warn_errno(-1,
+					      ENOMEM, "Failed to receive response for command \"%s\"",
+					      lxc_cmd_str(cmd->req.cmd));
 
-		rspdata->masterfd = rspfd;
+		rspdata->masterfd = move_fd(fd_rsp);
 		rspdata->ttynum = PTR_TO_INT(rsp->data);
 		rsp->data = rspdata;
 	}
 
-	if (cmd->req.cmd == LXC_CMD_GET_CGROUP2_FD)
-		rsp->data = INT_TO_PTR(rspfd);
-
-	if (rsp->datalen == 0) {
-		DEBUG("Response data length for command \"%s\" is 0",
-		      lxc_cmd_str(cmd->req.cmd));
-		return ret;
+	if (cmd->req.cmd == LXC_CMD_GET_CGROUP2_FD) {
+		int cgroup2_fd = move_fd(fd_rsp);
+		rsp->data = INT_TO_PTR(cgroup2_fd);
 	}
 
+	if (rsp->datalen == 0)
+		return log_debug(ret,
+				 "Response data length for command \"%s\" is 0",
+				 lxc_cmd_str(cmd->req.cmd));
+
 	if ((rsp->datalen > LXC_CMD_DATA_MAX) &&
-	    (cmd->req.cmd != LXC_CMD_CONSOLE_LOG)) {
-		ERROR("Response data for command \"%s\" is too long: %d bytes > %d",
-		      lxc_cmd_str(cmd->req.cmd), rsp->datalen, LXC_CMD_DATA_MAX);
-		return -1;
-	}
+	    (cmd->req.cmd != LXC_CMD_CONSOLE_LOG))
+		return log_error(-1, "Response data for command \"%s\" is too long: %d bytes > %d",
+				 lxc_cmd_str(cmd->req.cmd), rsp->datalen,
+				 LXC_CMD_DATA_MAX);
 
 	if (cmd->req.cmd == LXC_CMD_CONSOLE_LOG) {
 		rsp->data = malloc(rsp->datalen + 1);
@@ -190,19 +184,16 @@ static int lxc_cmd_rsp_recv(int sock, struct lxc_cmd_rr *cmd)
 	} else {
 		rsp->data = malloc(rsp->datalen);
 	}
-	if (!rsp->data) {
-		errno = ENOMEM;
-		ERROR("Failed to allocate response buffer for command \"%s\"",
-		      lxc_cmd_str(cmd->req.cmd));
-		return -1;
-	}
+	if (!rsp->data)
+		return log_error_errno(-1,
+				       ENOMEM, "Failed to allocate response buffer for command \"%s\"",
+				       lxc_cmd_str(cmd->req.cmd));
 
 	ret = lxc_recv_nointr(sock, rsp->data, rsp->datalen, 0);
-	if (ret != rsp->datalen) {
-		SYSERROR("Failed to receive response data for command \"%s\"",
-		         lxc_cmd_str(cmd->req.cmd));
-		return -1;
-	}
+	if (ret != rsp->datalen)
+		return log_error_errno(-1,
+				       errno, "Failed to receive response data for command \"%s\"",
+				       lxc_cmd_str(cmd->req.cmd));
 
 	return ret;
 }
@@ -1305,8 +1296,11 @@ int lxc_cmd_get_cgroup2_fd(const char *name, const char *lxcpath)
 	};
 
 	ret = lxc_cmd(name, &cmd, &stopped, lxcpath, NULL);
-	if (ret <= 0 || cmd.rsp.ret < 0)
-		return error_log_errno(errno, "Failed to retrieve cgroup2 fd");
+	if (ret < 0)
+		return -1;
+
+	if (cmd.rsp.ret < 0)
+		return error_log_errno(errno, "Failed to receive cgroup2 fd");
 
 	return PTR_TO_INT(cmd.rsp.data);
 }
@@ -1361,10 +1355,9 @@ static int lxc_cmd_process(int fd, struct lxc_cmd_req *req,
 		[LXC_CMD_GET_CGROUP2_FD]		= lxc_cmd_get_cgroup2_fd_callback,
 	};
 
-	if (req->cmd >= LXC_CMD_MAX) {
-		ERROR("Undefined command id %d", req->cmd);
-		return -1;
-	}
+	if (req->cmd >= LXC_CMD_MAX)
+		return log_error_errno(-1, ENOENT, "Undefined command id %d", req->cmd);
+
 	return cb[req->cmd](fd, req, handler, descr);
 }
 
diff --git a/src/lxc/log.h b/src/lxc/log.h
index 61de06b5a8..58c98d1e02 100644
--- a/src/lxc/log.h
+++ b/src/lxc/log.h
@@ -531,6 +531,19 @@ ATTR_UNUSED static inline void LXC_##LEVEL(struct lxc_log_locinfo* locinfo,	\
 		__ret__;		      \
 	})
 
+#define log_warn_errno(__ret__, __errno__, format, ...) \
+	({						\
+		errno = __errno__;			\
+		SYSWARN(format, ##__VA_ARGS__);		\
+		__ret__;				\
+	})
+
+#define log_debug(__ret__, format, ...)	      \
+	({				      \
+		DEBUG(format, ##__VA_ARGS__); \
+		__ret__;		      \
+	})
+
 extern int lxc_log_fd;
 
 extern int lxc_log_syslog(int facility);

From 05ffdb16f44199da48f8f50724f859d7c40a5e90 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Wed, 4 Dec 2019 13:26:54 +0100
Subject: [PATCH 2/2] cgroups: improve container cgroup attaching

The current attach.c codepath which handles moving the attaching process into
the container's cgroups allocates a whole new struct cgroup_ops and goes
through the trouble of reparsing the whole cgroup layout.
That's costly and wasteful. My plan has always been to move this into the
command api by getting fds for attaching back but but it's not worth going
through that hazzle for non-unified hosts. On pure unified hosts however -
being the future - we can just attach through a single fd so there's no need to
allocate and setup struct cgroup_ops.

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 src/lxc/attach.c         | 21 +++++++----
 src/lxc/cgroups/cgfsng.c | 80 +++++++++++++++++++++++++---------------
 src/lxc/cgroups/cgroup.h |  2 +
 src/lxc/macro.h          |  6 +++
 4 files changed, 72 insertions(+), 37 deletions(-)

diff --git a/src/lxc/attach.c b/src/lxc/attach.c
index 80c41fe263..8f5f8424dc 100644
--- a/src/lxc/attach.c
+++ b/src/lxc/attach.c
@@ -1230,16 +1230,21 @@ int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function,
 
 		/* Attach to cgroup, if requested. */
 		if (options->attach_flags & LXC_ATTACH_MOVE_TO_CGROUP) {
-			struct cgroup_ops *cgroup_ops;
+			/*
+			 * If this is the unified hierarchy cgroup_attach() is
+			 * enough.
+			 */
+			ret = cgroup_attach(name, lxcpath, pid);
+			if (ret) {
+				__do_cgroup_exit struct cgroup_ops *cgroup_ops = NULL;
 
-			cgroup_ops = cgroup_init(conf);
-			if (!cgroup_ops)
-				goto on_error;
+				cgroup_ops = cgroup_init(conf);
+				if (!cgroup_ops)
+					goto on_error;
 
-			if (!cgroup_ops->attach(cgroup_ops, name, lxcpath, pid))
-				goto on_error;
-
-			cgroup_exit(cgroup_ops);
+				if (!cgroup_ops->attach(cgroup_ops, name, lxcpath, pid))
+					goto on_error;
+			}
 			TRACE("Moved intermediate process %d into container's cgroups", pid);
 		}
 
diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c
index d2e1ecf6f5..5ee6105c1c 100644
--- a/src/lxc/cgroups/cgfsng.c
+++ b/src/lxc/cgroups/cgfsng.c
@@ -2202,38 +2202,14 @@ static inline char *build_full_cgpath_from_monitorpath(struct hierarchy *h,
 	return must_make_path(h->mountpoint, inpath, filename, NULL);
 }
 
-/* Technically, we're always at a delegation boundary here (This is especially
- * true when cgroup namespaces are available.). The reasoning is that in order
- * for us to have been able to start a container in the first place the root
- * cgroup must have been a leaf node. Now, either the container's init system
- * has populated the cgroup and kept it as a leaf node or it has created
- * subtrees. In the former case we will simply attach to the leaf node we
- * created when we started the container in the latter case we create our own
- * cgroup for the attaching process.
- */
-static int __cg_unified_attach(const struct hierarchy *h, const char *name,
-			       const char *lxcpath, const char *pidstr,
-			       size_t pidstr_len, const char *controller)
+static int cgroup_attach_leaf(int unified_fd, int64_t pid)
 {
-	__do_close_prot_errno int unified_fd = -EBADF;
 	int idx = 0;
 	int ret;
+	char pidstr[INTTYPE_TO_STRLEN(int64_t) + 1];
+	size_t pidstr_len;
 
-	unified_fd = lxc_cmd_get_cgroup2_fd(name, lxcpath);
-	if (unified_fd < 0) {
-		__do_free char *base_path = NULL, *container_cgroup = NULL;
-
-		container_cgroup = lxc_cmd_get_cgroup_path(name, lxcpath, controller);
-		/* not running */
-		if (!container_cgroup)
-			return 0;
-
-		base_path = must_make_path(h->mountpoint, container_cgroup, NULL);
-		unified_fd = open(base_path, O_DIRECTORY | O_RDONLY | O_CLOEXEC);
-	}
-	if (unified_fd < 0)
-		return -1;
-
+	pidstr_len = sprintf(pidstr, INT64_FMT, pid);
 	ret = lxc_writeat(unified_fd, "cgroup.procs", pidstr, pidstr_len);
 	if (ret == 0)
 		return 0;
@@ -2275,6 +2251,52 @@ static int __cg_unified_attach(const struct hierarchy *h, const char *name,
 	return -1;
 }
 
+int cgroup_attach(const char *name, const char *lxcpath, int64_t pid)
+{
+	__do_close_prot_errno int unified_fd = -EBADF;
+
+	unified_fd = lxc_cmd_get_cgroup2_fd(name, lxcpath);
+	if (unified_fd < 0)
+		return -1;
+
+	return cgroup_attach_leaf(unified_fd, pid);
+}
+
+/* Technically, we're always at a delegation boundary here (This is especially
+ * true when cgroup namespaces are available.). The reasoning is that in order
+ * for us to have been able to start a container in the first place the root
+ * cgroup must have been a leaf node. Now, either the container's init system
+ * has populated the cgroup and kept it as a leaf node or it has created
+ * subtrees. In the former case we will simply attach to the leaf node we
+ * created when we started the container in the latter case we create our own
+ * cgroup for the attaching process.
+ */
+static int __cg_unified_attach(const struct hierarchy *h, const char *name,
+			       const char *lxcpath, pid_t pid,
+			       const char *controller)
+{
+	__do_close_prot_errno int unified_fd = -EBADF;
+	int idx = 0;
+	int ret;
+
+	ret = cgroup_attach(name, lxcpath, pid);
+	if (ret < 0) {
+		__do_free char *path = NULL, *cgroup = NULL;
+
+		cgroup = lxc_cmd_get_cgroup_path(name, lxcpath, controller);
+		/* not running */
+		if (!cgroup)
+			return 0;
+
+		path = must_make_path(h->mountpoint, cgroup, NULL);
+		unified_fd = open(path, O_DIRECTORY | O_RDONLY | O_CLOEXEC);
+	}
+	if (unified_fd < 0)
+		return -1;
+
+	return cgroup_attach_leaf(unified_fd, pid);
+}
+
 __cgfsng_ops static bool cgfsng_attach(struct cgroup_ops *ops, const char *name,
 					 const char *lxcpath, pid_t pid)
 {
@@ -2293,7 +2315,7 @@ __cgfsng_ops static bool cgfsng_attach(struct cgroup_ops *ops, const char *name,
 		struct hierarchy *h = ops->hierarchies[i];
 
 		if (h->version == CGROUP2_SUPER_MAGIC) {
-			ret = __cg_unified_attach(h, name, lxcpath, pidstr, len,
+			ret = __cg_unified_attach(h, name, lxcpath, pid,
 						  h->controllers[0]);
 			if (ret < 0)
 				return false;
diff --git a/src/lxc/cgroups/cgroup.h b/src/lxc/cgroups/cgroup.h
index 91519fbe9c..f88c9ca415 100644
--- a/src/lxc/cgroups/cgroup.h
+++ b/src/lxc/cgroups/cgroup.h
@@ -192,6 +192,8 @@ static inline void __auto_cgroup_exit__(struct cgroup_ops **ops)
 		cgroup_exit(*ops);
 }
 
+extern int cgroup_attach(const char *name, const char *lxcpath, int64_t pid);
+
 #define __do_cgroup_exit __attribute__((__cleanup__(__auto_cgroup_exit__)))
 
 #endif
diff --git a/src/lxc/macro.h b/src/lxc/macro.h
index d9d91cd527..ed58bb1c68 100644
--- a/src/lxc/macro.h
+++ b/src/lxc/macro.h
@@ -21,6 +21,10 @@
 #ifndef __LXC_MACRO_H
 #define __LXC_MACRO_H
 
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE 1
+#endif
+#define __STDC_FORMAT_MACROS
 #include <asm/types.h>
 #include <limits.h>
 #include <linux/if_link.h>
@@ -39,6 +43,8 @@
 #define PATH_MAX 4096
 #endif
 
+#define INT64_FMT "%" PRId64
+
 /* Define __S_ISTYPE if missing from the C library. */
 #ifndef __S_ISTYPE
 #define __S_ISTYPE(mode, mask) (((mode)&S_IFMT) == (mask))