[lxc-devel] [lxcfs/master] cgroups: more cgroup2 fun

brauner on Github lxc-bot at linuxcontainers.org
Fri Apr 17 10:22:17 UTC 2020


A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 1047 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20200417/ff5a671e/attachment-0001.bin>
-------------- next part --------------
From d5287ee634c5d004c47eba8bfadf3cd0125eb17f Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Fri, 17 Apr 2020 12:20:55 +0200
Subject: [PATCH] cgroups: more cgroup2 fun

Try too read a valid value from a given cgroup file. If it is a legacy
cgroup hierarchy and we fail to find a valid value we terminate early
and report an error.
The cgroup2 hierarchy however, has different semantics. In a few
controller files it will show the value "max" or simply leave it
completely empty thereby indicating that no limit has been set for this
particular cgroup.  However, that doesn't mean that there's no limit. A
cgroup further up the hierarchy could have a limit set that also applies
to the cgroup we are interested in. So for the unified cgroup hierarchy
we need to keep walking towards the cgroup2 root cgroup and try to parse
a valid value.

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 src/cgroups/cgfsng.c       |  28 ++++++----
 src/cgroups/cgroup.c       |   3 ++
 src/cgroups/cgroup.h       |   6 +++
 src/cgroups/cgroup_utils.c | 106 +++++++++++++++++++++++++++++++++++++
 src/cgroups/cgroup_utils.h |   9 ++++
 5 files changed, 141 insertions(+), 11 deletions(-)

diff --git a/src/cgroups/cgfsng.c b/src/cgroups/cgfsng.c
index 8d3c53d..e795f5a 100644
--- a/src/cgroups/cgfsng.c
+++ b/src/cgroups/cgfsng.c
@@ -545,9 +545,8 @@ static bool cgfsng_get(struct cgroup_ops *ops, const char *controller,
 static int cgfsng_get_memory(struct cgroup_ops *ops, const char *cgroup,
 			     const char *file, char **value)
 {
-	__do_free char *path = NULL;
 	struct hierarchy *h;
-	int ret;
+	int cgroup2_root_fd, layout, ret;
 
 	h = ops->get_hierarchy(ops, "memory");
 	if (!h)
@@ -562,17 +561,18 @@ static int cgfsng_get_memory(struct cgroup_ops *ops, const char *cgroup,
 			file = "memory.memsw.usage_in_bytes";
 		else if (strcmp(file, "memory.current") == 0)
 			file = "memory.usage_in_bytes";
-		ret = CGROUP_SUPER_MAGIC;
+		layout = CGROUP_SUPER_MAGIC;
+		cgroup2_root_fd = -EBADF;
 	} else {
-		ret = CGROUP2_SUPER_MAGIC;
+		layout = CGROUP2_SUPER_MAGIC;
+		cgroup2_root_fd = ops->cgroup2_root_fd;
 	}
 
-	path = must_make_path_relative(cgroup, file, NULL);
-	*value = readat_file(h->fd, path);
-	if (!*value)
-		ret = -1;
+	ret = cgroup_walkup_to_root(cgroup2_root_fd, h->fd, cgroup, file, value);
+	if (ret < 0)
+		return ret;
 
-	return ret;
+	return layout;
 }
 
 static int cgfsng_get_memory_stats_fd(struct cgroup_ops *ops, const char *cgroup)
@@ -918,6 +918,11 @@ static int cg_unified_init(struct cgroup_ops *ops)
 
 	ops->cgroup_layout = CGROUP_LAYOUT_UNIFIED;
 	ops->unified = new;
+
+	ops->cgroup2_root_fd = open(DEFAULT_CGROUP_MOUNTPOINT, O_DIRECTORY | O_PATH | O_CLOEXEC);
+	if (ops->cgroup2_root_fd < 0)
+		return -errno;
+
 	return CGROUP2_SUPER_MAGIC;
 }
 
@@ -939,12 +944,13 @@ struct cgroup_ops *cgfsng_ops_init(void)
 {
 	__do_free struct cgroup_ops *cgfsng_ops = NULL;
 
-	cgfsng_ops = malloc(sizeof(struct cgroup_ops));
+	cgfsng_ops = zalloc(sizeof(struct cgroup_ops));
 	if (!cgfsng_ops)
 		return ret_set_errno(NULL, ENOMEM);
 
-	memset(cgfsng_ops, 0, sizeof(struct cgroup_ops));
 	cgfsng_ops->cgroup_layout = CGROUP_LAYOUT_UNKNOWN;
+	cgfsng_ops->mntns_fd = -EBADF;
+	cgfsng_ops->cgroup2_root_fd = -EBADF;
 
 	if (cg_init(cgfsng_ops))
 		return NULL;
diff --git a/src/cgroups/cgroup.c b/src/cgroups/cgroup.c
index 342c86e..fee04c8 100644
--- a/src/cgroups/cgroup.c
+++ b/src/cgroups/cgroup.c
@@ -72,6 +72,9 @@ void cgroup_exit(struct cgroup_ops *ops)
 	if (ops->mntns_fd >= 0)
 		close(ops->mntns_fd);
 
+	if (ops->cgroup2_root_fd >= 0)
+		close(ops->cgroup2_root_fd);
+
 	free(ops->hierarchies);
 
 	free(ops);
diff --git a/src/cgroups/cgroup.h b/src/cgroups/cgroup.h
index dfcee0e..e8327fd 100644
--- a/src/cgroups/cgroup.h
+++ b/src/cgroups/cgroup.h
@@ -91,6 +91,12 @@ struct cgroup_ops {
 	 */
 	int mntns_fd;
 
+	/*
+	 * A file descriptor to the root of the cgroup tree if we're on a
+	 * cgroup2 only system.
+	 */
+	int cgroup2_root_fd;
+
 	/* string constant */
 	const char *driver;
 
diff --git a/src/cgroups/cgroup_utils.c b/src/cgroups/cgroup_utils.c
index 09bb0e6..c9e5ce8 100644
--- a/src/cgroups/cgroup_utils.c
+++ b/src/cgroups/cgroup_utils.c
@@ -724,3 +724,109 @@ bool mkdir_p(const char *dir, mode_t mode)
 
 	return true;
 }
+
+static bool same_file(int fd1, int fd2)
+{
+	struct stat st1, st2;
+
+	if (fstat(fd1, &st1) < 0 || fstat(fd2, &st2) < 0)
+		return false;
+
+	return (st1.st_dev == st2.st_dev) && (st1.st_ino == st2.st_ino);
+}
+
+/**
+ * cgroup_walkup_to_root() - Walk upwards to cgroup root to find valid value
+ *
+ * @cgroup2_root_fd:	File descriptor for the cgroup2 root mount point.
+ * @hierarchy_fd:	File descriptor for the hierarchy.
+ * @cgroup:		A cgroup directory relative to @hierarchy_fd.
+ * @file:		The file in @cgroup from which to read a value.
+ * @value:		Return argument to store value read from @file.
+ *
+ * This function tries to read a valid value from @file in @cgroup in
+ * @hierarchy_fd. If it is a legacy cgroup hierarchy and we fail to find a
+ * valid value we terminate early and report an error.
+ * The cgroup2 hierarchy however, has different semantics. In a few controller
+ * files it will show the value "max" or simply leave it completely empty
+ * thereby indicating that no limit has been set for this particular cgroup.
+ * However, that doesn't mean that there's no limit. A cgroup further up the
+ * hierarchy could have a limit set that also applies to the cgroup we are
+ * interested in. So for the unified cgroup hierarchy we need to keep walking
+ * towards the cgroup2 root cgroup and try to parse a valid value.
+ */
+int cgroup_walkup_to_root(int cgroup2_root_fd, int hierarchy_fd,
+			  const char *cgroup, const char *file, char **value)
+{
+	__do_close int dir_fd = -EBADF;
+	__do_free char *val = NULL;
+	bool no_limit = false;
+
+	/* Look in our current cgroup for a valid value. */
+	dir_fd = openat(hierarchy_fd, cgroup, O_DIRECTORY | O_PATH | O_CLOEXEC);
+	if (dir_fd < 0)
+		return -errno;
+
+	val = readat_file(dir_fd, file);
+	if (is_empty_string(val) || strcmp(val, "max") == 0) {
+		no_limit = true;
+	} else if (!is_empty_string(val)) {
+		*value = move_ptr(val);
+		return 0;
+	}
+
+	/*
+	 * Legacy cgroup hierarchies should always show a valid value in the
+	 * file of the cgroup. So no need to do this upwards walking crap.
+	 */
+	if (cgroup2_root_fd < 0)
+		return -EINVAL;
+
+	free_disarm(val);
+	for (int i = 0;; i++) {
+		__do_close int cur_fd = -EBADF, new_dir_fd = -EBADF;
+		__do_free char *new_val = NULL;
+		int swap_fd;
+
+		new_dir_fd = move_fd(dir_fd);
+		cur_fd = openat(new_dir_fd, "../", O_DIRECTORY | O_PATH | O_CLOEXEC);
+		if (cur_fd < 0)
+			return -errno;
+
+		/*
+		 * We're at the root of the cgroup2 tree so stop walking
+		 * upwards.
+		 */
+		if (same_file(cgroup2_root_fd, cur_fd)) {
+			if (no_limit)
+				return 1;
+
+			return -EINVAL;
+		}
+
+		/* We found a valid value. Terminate walk. */
+		new_val = readat_file(new_dir_fd, file);
+		if (is_empty_string(new_val) || strcmp(new_val, "max") == 0) {
+			no_limit = true;
+		} else if (!is_empty_string(new_val)) {
+			*value = move_ptr(new_val);
+			return 0;
+		}
+
+		/*
+		 * Set an arbitraty hard-coded limit to prevent us from ending
+		 * up in an endless loop. There really shouldn't be any cgroup
+		 * tree that is 1000 levels deep. That would be insane in
+		 * principal and performance-wise.
+		 */
+		if (i == 1000)
+			return log_error_errno(-ELOOP, ELOOP, "To many nested cgroups or invalid mount tree. Terminating walk");
+
+		swap_fd = move_fd(cur_fd);
+		cur_fd = move_fd(new_dir_fd);
+		new_dir_fd = swap_fd;
+	}
+
+	/* Not reached. */
+	return -EINVAL;
+}
diff --git a/src/cgroups/cgroup_utils.h b/src/cgroups/cgroup_utils.h
index 094f2c2..d1f5639 100644
--- a/src/cgroups/cgroup_utils.h
+++ b/src/cgroups/cgroup_utils.h
@@ -95,6 +95,10 @@ static inline int openat_safe(int fd, const char *path)
 	return openat(fd, path, O_DIRECTORY | O_RDONLY | O_CLOEXEC | O_NOFOLLOW);
 }
 
+extern int cgroup_walkup_to_root(int cgroup2_root_fd, int hierarchy_fd,
+				 const char *cgroup, const char *file,
+				 char **value);
+
 #define must_make_path_relative(__first__, ...)                                \
 	({                                                                     \
 		char *__ptr__;                                                 \
@@ -105,4 +109,9 @@ static inline int openat_safe(int fd, const char *path)
 		__ptr__;                                                       \
 	})
 
+static inline bool is_empty_string(const char *s)
+{
+	return !s || strcmp(s, "") == 0;
+}
+
 #endif /* __LXC_CGROUP_UTILS_H */


More information about the lxc-devel mailing list