[lxc-devel] [PATCH] cgroupfs: cpuset support for kernels without cgroup.clone_children

Serge Hallyn serge.hallyn at ubuntu.com
Wed Jan 29 10:01:06 UTC 2014


Quoting Robert Vogelgesang (vogel at users.sourceforge.net):
> Hi Serge,
> 
> On Tue, Jan 28, 2014 at 04:32:30PM +0000, Serge Hallyn wrote:
> > Quoting Robert Vogelgesang (vogel at users.sourceforge.net):
> > > Hi,
> > > 
> > > as promised last week, here's my patch for cpuset cgroup support for
> > > kernels without the cgroup.clone_children feature.
> > 
> > Thanks.
> > 
> > > My initial patch used "#include <linux/version.h>" and the macros defined
> > > there to decide if cgroup.clone_children should be used or not.  After
> > > having seen Serge Hallyn's patch which he posted to the list last Wednesday,
> > > where he used stat() to check if the cgroup.clone_children file is there,
> > > I rewrote my patch to do the same.
> > > 
> > > The patch is against 1.0.0.beta3, and it is tested successfully with
> > > RHEL-6's kernel version 2.6.32-431.3.1.el6, compiled without cgmanager
> > 
> > Shouldn't be a problem as the cgfs_create function won't be called if
> > cgmanager is running.
> > 
> > > (I've so far not tried to use cgmanager in RHEL-6).
> > > 
> > > In addition to fixing the cpuset cgroup setup, this patch also fixes a
> > > wrong argument in a call to handle_cgroup_settings() in the same context.
> > > 
> > > 	Robert
> > > 
> > > Signed-off-by: Robert Vogelgesang <vogel at users.sourceforge.net>
> > 
> > Just a two questions.  I'll go ahead and run tests here while waiting
> > for an answer.  If it comes down to "add the free and use 1024", I can
> > just do that inline.
> 
> Yes, thank you, "free(childfile);" should be added where you noted it.
> 
> Regarding the size of the "value" buffer: 128 is what I used in my
> tests; it should be enough for servers with up to around 80 CPUs;
> 1024 would support at least around 540 CPUs.  I don't know where to
> set the limit.
> 
> Maybe we should modify the patch so that init_cpuset_if_needed()
> checks if cgroup.clone_children is available before calling
> do_init_cpuset_file().  Under the assumption that the really large
> machines are running newer kernels, this buffer would then never be used.
> 
> Alternatively we could change the initial check for non-empty cpuset.cpus
> and cpuset.mems files to not treat an insufficient buffer size as
> an error.  This, too, would make the buffer size irrelevant on kernels
> that support cgroup.clone_children.
> 
> Let me know if I should do one or the other.

Ok I've gone ahead and done both.  My change ended up more invasive than
I had planned, but it tests ok here and should do the right thing.  I've
gone ahead and pushed the below:

>From 934b1673cda3f4fc17d66cfe3f429d7fafe3f4a6 Mon Sep 17 00:00:00 2001
From: Serge Hallyn <serge.hallyn at ubuntu.com>
Date: Wed, 29 Jan 2014 09:40:39 +0000
Subject: [PATCH 2/4] cgroups: adjust previous commit

Remove a memory leak on error path.

Only try to initialize cpuset if cgroup.clonechildren does not exist.

Bump the max value we read from cpuset.{cpus,mems} to 1024.

If cpuset.cpus or .mems is already initialized but is too long, don't fail.

If parent's cpuset.cpus or .mems is too long, record an error and fail.
If anyone actually runs into this, we can simply allocate the required
length as needed, but we don't expect anyone to run into this.

Signed-off-by: Serge Hallyn <serge.hallyn at ubuntu.com>
---
 src/lxc/cgroup.c | 73 +++++++++++++++++++++++++++++++++-----------------------
 src/lxc/cgroup.h |  1 +
 2 files changed, 44 insertions(+), 30 deletions(-)

diff --git a/src/lxc/cgroup.c b/src/lxc/cgroup.c
index 87d1bb7..fd60155 100644
--- a/src/lxc/cgroup.c
+++ b/src/lxc/cgroup.c
@@ -2026,6 +2026,8 @@ static int handle_cgroup_settings(struct cgroup_mount_point *mp,
 	int r, saved_errno = 0;
 	char buf[2];
 
+	mp->need_cpuset_init = false;
+
 	/* If this is the memory cgroup, we want to enforce hierarchy.
 	 * But don't fail if for some reason we can't.
 	 */
@@ -2058,6 +2060,7 @@ static int handle_cgroup_settings(struct cgroup_mount_point *mp,
 		 * was created
 		 */
 		if (stat(cc_path, &sb) != 0 && errno == ENOENT) {
+			mp->need_cpuset_init = true;
 			free(cc_path);
 			return 0;
 		}
@@ -2075,75 +2078,85 @@ static int handle_cgroup_settings(struct cgroup_mount_point *mp,
 	return 0;
 }
 
-static bool cgroup_read_from_file(const char *fn, char buf[], size_t bufsize)
+static int cgroup_read_from_file(const char *fn, char buf[], size_t bufsize)
 {
 	int ret = lxc_read_from_file(fn, buf, bufsize);
 	if (ret < 0) {
 		SYSERROR("failed to read %s", fn);
-		return false;
+		return ret;
 	}
 	if (ret == bufsize) {
-		ERROR("too much data in %s", fn);
-		return false;
+		if (bufsize > 0) {
+			/* obviously this wasn't empty */
+			buf[bufsize-1] = '\0';
+			return ret;
+		}
+		/* Callers don't do this, but regression/sanity check */
+		ERROR("%s: was not expecting 0 bufsize", __func__);
+		return -1;
 	}
 	buf[ret] = '\0';
-	return true;
+	return ret;
 }
 
 static bool do_init_cpuset_file(struct cgroup_mount_point *mp,
 				const char *path, const char *name)
 {
-	char value[128];
-	char *childfile, *parentfile, *tmp;
-	bool ok;
+	char value[1024];
+	char *childfile, *parentfile = NULL, *tmp;
+	int ret;
+	bool ok = false;
+
+	if (!mp->need_cpuset_init)
+		return true;
 
 	childfile = cgroup_to_absolute_path(mp, path, name);
 	if (!childfile)
 		return false;
 
 	/* don't overwrite a non-empty value in the file */
-	if (!cgroup_read_from_file(childfile, value, sizeof(value))) {
-		free(childfile);
-		return false;
-	}
+	ret = cgroup_read_from_file(childfile, value, sizeof(value));
+	if (ret < 0)
+		goto out;
 	if (value[0] != '\0' && value[0] != '\n') {
-		free(childfile);
-		return true;
+		ok = true;
+		goto out;
 	}
 
 	/* path to the same name in the parent cgroup */
 	parentfile = strdup(path);
 	if (!parentfile)
-		return false;
+		goto out;
+
 	tmp = strrchr(parentfile, '/');
-	if (!tmp) {
-		free(childfile);
-		free(parentfile);
-		return false;
-	}
+	if (!tmp)
+		goto out;
 	if (tmp == parentfile)
 		tmp++; /* keep the '/' at the start */
 	*tmp = '\0';
 	tmp = parentfile;
 	parentfile = cgroup_to_absolute_path(mp, tmp, name);
 	free(tmp);
-	if (!parentfile) {
-		free(childfile);
-		return false;
-	}
+	if (!parentfile)
+		goto out;
 
 	/* copy from parent to child cgroup */
-	if (!cgroup_read_from_file(parentfile, value, sizeof(value))) {
-		free(parentfile);
-		free(childfile);
-		return false;
+	ret = cgroup_read_from_file(parentfile, value, sizeof(value));
+	if (ret < 0)
+		goto out;
+	if (ret == sizeof(value)) {
+		/* If anyone actually sees this error, we can address it */
+		ERROR("parent cpuset value too long");
+		goto out;
 	}
 	ok = (lxc_write_to_file(childfile, value, strlen(value), false) >= 0);
+
+out:
 	if (!ok)
 		SYSERROR("failed writing %s", childfile);
-	free(parentfile);
+	if (parentfile)
+		free(parentfile);
 	free(childfile);
-
 	return ok;
 }
 
diff --git a/src/lxc/cgroup.h b/src/lxc/cgroup.h
index fbb3b87..a2e7cd3 100644
--- a/src/lxc/cgroup.h
+++ b/src/lxc/cgroup.h
@@ -66,6 +66,7 @@ struct cgroup_mount_point {
 	char *mount_point;
 	char *mount_prefix;
 	bool read_only;
+	bool need_cpuset_init;
 };
 
 /*
-- 
1.8.5.3



More information about the lxc-devel mailing list