[lxc-devel] [lxc/master] cgroups: remove isolated cpus from cpuset.cpus …

brauner on Github lxc-bot at linuxcontainers.org
Sun Nov 6 20:11:07 UTC 2016


A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 1596 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20161106/b3c70974/attachment.bin>
-------------- next part --------------
From 0ce13893d343e3765e692fdb25179d76758c9562 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at canonical.com>
Date: Sun, 6 Nov 2016 19:48:58 +0100
Subject: [PATCH 1/2] utils: add lxc_append_string()

lxc_append_string() appends strings without separator. This is mostly useful
for reading in whole files line-by-line.

Signed-off-by: Christian Brauner <christian.brauner at canonical.com>
---
 src/lxc/utils.c | 34 ++++++++++++++++++++++++++++++++++
 src/lxc/utils.h |  2 ++
 2 files changed, 36 insertions(+)

diff --git a/src/lxc/utils.c b/src/lxc/utils.c
index c912fe8..d6f8ecf 100644
--- a/src/lxc/utils.c
+++ b/src/lxc/utils.c
@@ -1930,3 +1930,37 @@ bool task_blocking_signal(pid_t pid, int signal)
 	fclose(f);
 	return bret;
 }
+
+static int lxc_append_null_to_list(void ***list)
+{
+	int newentry = 0;
+	void **tmp;
+
+	if (*list)
+		for (; (*list)[newentry]; newentry++) {
+			;
+		}
+
+	tmp = realloc(*list, (newentry + 2) * sizeof(void **));
+	if (!tmp)
+		return -1;
+
+	*list = tmp;
+	(*list)[newentry + 1] = NULL;
+
+	return newentry;
+}
+
+int lxc_append_string(char ***list, char *entry)
+{
+	int newentry = lxc_append_null_to_list((void ***)list);
+	char *copy;
+
+	copy = strdup(entry);
+	if (!copy)
+		return -1;
+
+	(*list)[newentry] = copy;
+
+	return 0;
+}
diff --git a/src/lxc/utils.h b/src/lxc/utils.h
index a0fa0e2..50e1847 100644
--- a/src/lxc/utils.h
+++ b/src/lxc/utils.h
@@ -258,6 +258,8 @@ extern char *lxc_append_paths(const char *first, const char *second);
 extern bool lxc_string_in_list(const char *needle, const char *haystack, char sep);
 extern char **lxc_string_split(const char *string, char sep);
 extern char **lxc_string_split_and_trim(const char *string, char sep);
+/* Append string to NULL-terminated string array. */
+extern int lxc_append_string(char ***list, char *entry);
 
 /* some simple array manipulation utilities */
 typedef void (*lxc_free_fn)(void *);

From 684a0cf5a3a576a34d3e05e3faf18ee1baee8be6 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at canonical.com>
Date: Sun, 6 Nov 2016 19:50:54 +0100
Subject: [PATCH 2/2] cgroups: remove isolated cpus from cpuset.cpus

In case the system was booted with

    isolcpus=n_i-n_j,n_k,n_m

we cannot simply copy the cpuset.cpus file from our parent cgroup. For example,
in the root cgroup cpuset.cpus will contain all of the cpus including the
isolated cpus. Copying the values of the root cgroup into a child cgroup will
lead to a wrong view in /proc/self/status: For the root cgroup
/sys/fs/cgroup/cpuset /proc/self/status will correctly show

    Cpus_allowed_list:      0-1,3

even though cpuset.cpus will show

    0-3

However, initializing a subcgroup in the cpuset controller by copying the
cpuset.cpus setting from the root cgroup will cause /proc/self/status to
incorrectly show

    Cpus_allowed_list:      0-3

Hence, we need to make sure to remove the isolated cpus from cpuset.cpus. Seth
has argued that this is not a kernel bug but by design. So let us be the smart
guys and fix this in liblxc.

The solution is straightforward: To avoid having to work with raw cpulist
strings we create cpumasks based on uint32_t bit arrays.

Signed-off-by: Christian Brauner <christian.brauner at canonical.com>
---
 src/lxc/cgroups/cgfsng.c | 325 +++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 275 insertions(+), 50 deletions(-)

diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c
index ec94099..1e6cb98 100644
--- a/src/lxc/cgroups/cgfsng.c
+++ b/src/lxc/cgroups/cgfsng.c
@@ -33,21 +33,25 @@
  * under /sys/fs/cgroup/clist where clist is either the controller, or
  * a comman-separated list of controllers.
  */
+
 #include "config.h"
-#include <stdio.h>
+
+#include <ctype.h>
+#include <dirent.h>
+#include <errno.h>
+#include <grp.h>
+#include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
-#include <errno.h>
-#include <sys/types.h>
+#include <string.h>
 #include <unistd.h>
-#include <dirent.h>
-#include <grp.h>
+#include <sys/types.h>
 
 #include "bdev.h"
-#include "log.h"
 #include "cgroup.h"
-#include "utils.h"
 #include "commands.h"
+#include "log.h"
+#include "utils.h"
 
 lxc_log_define(lxc_cgfsng, lxc);
 
@@ -251,6 +255,264 @@ struct hierarchy *get_hierarchy(const char *c)
 
 static char *must_make_path(const char *first, ...) __attribute__((sentinel));
 
+#define BATCH_SIZE 50
+static void batch_realloc(char **mem, size_t oldlen, size_t newlen)
+{
+	int newbatches = (newlen / BATCH_SIZE) + 1;
+	int oldbatches = (oldlen / BATCH_SIZE) + 1;
+
+	if (!*mem || newbatches > oldbatches) {
+		*mem = must_realloc(*mem, newbatches * BATCH_SIZE);
+	}
+}
+
+static void append_line(char **dest, size_t oldlen, char *new, size_t newlen)
+{
+	size_t full = oldlen + newlen;
+
+	batch_realloc(dest, oldlen, full + 1);
+
+	memcpy(*dest + oldlen, new, newlen + 1);
+}
+
+/* Slurp in a whole file */
+static char *read_file(char *fnam)
+{
+	FILE *f;
+	char *line = NULL, *buf = NULL;
+	size_t len = 0, fulllen = 0;
+	int linelen;
+
+	f = fopen(fnam, "r");
+	if (!f)
+		return NULL;
+	while ((linelen = getline(&line, &len, f)) != -1) {
+		append_line(&buf, fulllen, line, linelen);
+		fulllen += linelen;
+	}
+	fclose(f);
+	free(line);
+	return buf;
+}
+
+/* Taken over modified from the kernel sources. */
+#define NBITS 32 /* bits in uint32_t */
+#define DIV_ROUND_UP(n, d) (((n) + (d)-1) / (d))
+#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, NBITS)
+
+static void set_bit(unsigned bit, uint32_t *bitarr)
+{
+	bitarr[bit / NBITS] |= (1 << (bit % NBITS));
+}
+
+static void clear_bit(unsigned bit, uint32_t *bitarr)
+{
+	bitarr[bit / NBITS] &= ~(1 << (bit % NBITS));
+}
+
+static bool is_set(unsigned bit, uint32_t *bitarr)
+{
+	return (bitarr[bit / NBITS] & (1 << (bit % NBITS))) != 0;
+}
+
+/* Create cpumask from cpulist aka turn:
+ *
+ *	0,2-3
+ *
+ *  into bit array
+ *
+ *	1 0 1 1
+ */
+static uint32_t *lxc_cpumask(char *buf, size_t nbits)
+{
+	char *token;
+	char *saveptr = NULL;
+	size_t arrlen = BITS_TO_LONGS(nbits);
+	uint32_t *bitarr = calloc(arrlen, sizeof(uint32_t));
+	if (!bitarr)
+		return NULL;
+
+	for (; (token = strtok_r(buf, ",", &saveptr)); buf = NULL) {
+		errno = 0;
+		unsigned start = strtoul(token, NULL, 0);
+		unsigned end = start;
+
+		char *range = strchr(token, '-');
+		if (range)
+			end = strtoul(range + 1, NULL, 0);
+		if (!(start <= end)) {
+			free(bitarr);
+			return NULL;
+		}
+
+		if (end >= nbits) {
+			free(bitarr);
+			return NULL;
+		}
+
+		while (start <= end)
+			set_bit(start++, bitarr);
+	}
+
+	return bitarr;
+}
+
+/* The largest integer that can fit into long int is 2^64. This is a
+ * 20-digit number. */
+#define LEN 21
+/* Turn cpumask into simple, comma-separated cpulist. */
+static char *lxc_cpumask_to_cpulist(uint32_t *bitarr, size_t nbits)
+{
+	size_t i;
+	int ret;
+	char numstr[LEN] = {0};
+	char **cpulist = NULL;
+
+	for (i = 0; i <= nbits; i++) {
+		if (is_set(i, bitarr)) {
+			ret = snprintf(numstr, LEN, "%lu", i);
+			if (ret < 0 || (size_t)ret >= LEN) {
+				lxc_free_array((void **)cpulist, free);
+				return NULL;
+			}
+			if (lxc_append_string(&cpulist, numstr) < 0) {
+				lxc_free_array((void **)cpulist, free);
+				return NULL;
+			}
+		}
+	}
+	return lxc_string_join(",", (const char **)cpulist, false);
+}
+
+static ssize_t get_max_cpus(char *cpulist)
+{
+	char *c1, *c2;
+	char *maxcpus = cpulist;
+	size_t cpus = 0;
+
+	c1 = strrchr(maxcpus, ',');
+	if (c1)
+		c1++;
+
+	c2 = strrchr(maxcpus, '-');
+	if (c2)
+		c2++;
+
+	if (!c1 && !c2)
+		c1 = maxcpus;
+	else if (c1 > c2)
+		c2 = c1;
+	else if (c2 < c1)
+		c1 = c2;
+	else if (!c1 && c2) // The reverse case is obvs. not needed.
+		c1 = c2;
+
+	/* If the above logic is correct, c1 should always hold a valid string
+	 * here.
+	 */
+
+	errno = 0;
+	cpus = strtoul(c1, NULL, 0);
+	if (errno != 0)
+		return -1;
+
+	return cpus;
+}
+
+static bool set_cpuset(char *path, bool am_initialized)
+{
+	char *lastslash, *fpath, oldv;
+	int ret;
+	ssize_t i;
+
+	ssize_t maxposs = 0, maxisol = 0;
+	char *cpulist = NULL, *posscpus = NULL, *isolcpus = NULL;
+	uint32_t *possmask = NULL, *isolmask = NULL;
+	bool bret = false;
+
+	lastslash = strrchr(path, '/');
+	if (!lastslash) { // bug...  this shouldn't be possible
+		ERROR("cgfsng:copy_parent_file: bad path %s", path);
+		return bret;
+	}
+	oldv = *lastslash;
+	*lastslash = '\0';
+	fpath = must_make_path(path, "cpuset.cpus", NULL);
+	posscpus = read_file(fpath);
+	if (!posscpus)
+		goto cleanup;
+
+	/* Get maximum number of cpus found in possible cpuset. */
+	maxposs = get_max_cpus(posscpus);
+	if (maxposs < 0)
+		goto cleanup;
+
+	isolcpus = read_file("/sys/devices/system/cpu/isolated");
+	if (!isolcpus)
+		goto cleanup;
+	if (!isdigit(isolcpus[0])) {
+		cpulist = posscpus;
+		/* No isolated cpus but we weren't already initialized by
+		 * someone. We should simply copy the parents cpuset.cpus
+		 * values.
+		 */
+		if (!am_initialized)
+			goto copy_parent;
+		/* No isolated cpus but we were already initialized by someone.
+		 * Nothing more to do for us.
+		 */
+		bret = true;
+		goto cleanup;
+	}
+
+	/* Get maximum number of cpus found in isolated cpuset. */
+	maxisol = get_max_cpus(isolcpus);
+	if (maxisol < 0)
+		goto cleanup;
+
+	if (maxposs < maxisol)
+		maxposs = maxisol;
+	maxposs++;
+
+	possmask = lxc_cpumask(posscpus, maxposs);
+	if (!possmask)
+		goto cleanup;
+
+	isolmask = lxc_cpumask(isolcpus, maxposs);
+	if (!isolmask)
+		goto cleanup;
+
+	for (i = 0; i <= maxposs; i++) {
+		if (is_set(i, isolmask) && is_set(i, possmask)) {
+			clear_bit(i, possmask);
+		}
+	}
+
+	cpulist = lxc_cpumask_to_cpulist(possmask, maxposs);
+	if (!cpulist) /* Bug */
+		goto cleanup;
+
+copy_parent:
+	*lastslash = oldv;
+	fpath = must_make_path(path, "cpuset.cpus", NULL);
+	ret = lxc_write_to_file(fpath, cpulist, strlen(cpulist), false);
+	if (!ret)
+		bret = true;
+
+cleanup:
+	free(fpath);
+
+	free(isolcpus);
+	free(isolmask);
+
+	if (posscpus != cpulist)
+		free(posscpus);
+	free(possmask);
+
+	free(cpulist);
+	return bret;
+}
+
 /* Copy contents of parent(@path)/@file to @path/@file */
 static bool copy_parent_file(char *path, char *file)
 {
@@ -295,7 +557,7 @@ static bool copy_parent_file(char *path, char *file)
  * Since the h->base_path is populated by init or ourselves, we know
  * it is already initialized.
  */
-bool handle_cpuset_hierarchy(struct hierarchy *h, char *cgname)
+static bool handle_cpuset_hierarchy(struct hierarchy *h, char *cgname)
 {
 	char *cgpath, *clonechildrenpath, v, *slash;
 
@@ -329,6 +591,10 @@ bool handle_cpuset_hierarchy(struct hierarchy *h, char *cgname)
 		return false;
 	}
 
+	/* Make sure any isolated cpus are removed from cpuset.cpus. */
+	if (!set_cpuset(cgpath, v == '1'))
+		return false;
+
 	if (v == '1') {  /* already set for us by someone else */
 		free(clonechildrenpath);
 		free(cgpath);
@@ -336,8 +602,7 @@ bool handle_cpuset_hierarchy(struct hierarchy *h, char *cgname)
 	}
 
 	/* copy parent's settings */
-	if (!copy_parent_file(cgpath, "cpuset.cpus") ||
-			!copy_parent_file(cgpath, "cpuset.mems")) {
+	if (!copy_parent_file(cgpath, "cpuset.mems")) {
 		free(cgpath);
 		free(clonechildrenpath);
 		return false;
@@ -605,46 +870,6 @@ static char *get_current_cgroup(char *basecginfo, char *controller)
 	}
 }
 
-#define BATCH_SIZE 50
-static void batch_realloc(char **mem, size_t oldlen, size_t newlen)
-{
-	int newbatches = (newlen / BATCH_SIZE) + 1;
-	int oldbatches = (oldlen / BATCH_SIZE) + 1;
-
-	if (!*mem || newbatches > oldbatches) {
-		*mem = must_realloc(*mem, newbatches * BATCH_SIZE);
-	}
-}
-
-static void append_line(char **dest, size_t oldlen, char *new, size_t newlen)
-{
-	size_t full = oldlen + newlen;
-
-	batch_realloc(dest, oldlen, full + 1);
-
-	memcpy(*dest + oldlen, new, newlen + 1);
-}
-
-/* Slurp in a whole file */
-static char *read_file(char *fnam)
-{
-	FILE *f;
-	char *line = NULL, *buf = NULL;
-	size_t len = 0, fulllen = 0;
-	int linelen;
-
-	f = fopen(fnam, "r");
-	if (!f)
-		return NULL;
-	while ((linelen = getline(&line, &len, f)) != -1) {
-		append_line(&buf, fulllen, line, linelen);
-		fulllen += linelen;
-	}
-	fclose(f);
-	free(line);
-	return buf;
-}
-
 /*
  * Given a hierarchy @mountpoint and base @path, verify that we can create
  * directories underneath it.


More information about the lxc-devel mailing list