[lxc-devel] [lxcfs/master] CPU views based on quotas

aither64 on Github lxc-bot at linuxcontainers.org
Tue Oct 23 07:05:08 UTC 2018


A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 1880 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20181023/f59f7669/attachment.bin>
-------------- next part --------------
From 67dedf76ca8b3f8792d5ddca2b4f25a33a2c17f3 Mon Sep 17 00:00:00 2001
From: Jakub Skokan <jakub.skokan at havefun.cz>
Date: Mon, 25 Jun 2018 08:54:15 +0200
Subject: [PATCH 1/7] cpuinfo: use cpu view based on cpu quotas

Signed-off-by: Jakub Skokan <jakub.skokan at havefun.cz>
---
 bindings.c | 91 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 90 insertions(+), 1 deletion(-)

diff --git a/bindings.c b/bindings.c
index 70386fc..40ba094 100644
--- a/bindings.c
+++ b/bindings.c
@@ -3509,6 +3509,85 @@ static bool cpuline_in_cpuset(const char *line, const char *cpuset)
 	return cpu_in_cpuset(cpu, cpuset);
 }
 
+/*
+ * Read cgroup CPU quota parameters from `cpu.cfs_quota_us` or `cpu.cfs_period_us`,
+ * depending on `param`. Parameter value is returned throuh `value`.
+ */
+static bool read_cpu_cfs_param(const char *cg, const char *param, int64_t *value)
+{
+	bool rv = false;
+	char file[11 + 6 + 1]; // cpu.cfs__us + quota/period + \0
+	char *str = NULL;
+
+	sprintf(file, "cpu.cfs_%s_us", param);
+
+	if (!cgfs_get_value("cpu", cg, file, &str))
+		goto err;
+
+	if (sscanf(str, "%ld", value) != 1)
+		goto err;
+
+	rv = true;
+
+err:
+	if (str)
+		free(str);
+	return rv;
+}
+
+/*
+ * Return the maximum number of visible CPUs based on CPU quotas.
+ * If there is no quota set, zero is returned.
+ */
+int max_cpu_count(const char *cg)
+{
+	int rv, nprocs;
+	int64_t cfs_quota, cfs_period;
+
+	if (!read_cpu_cfs_param(cg, "quota", &cfs_quota))
+		return 0;
+
+	if (!read_cpu_cfs_param(cg, "period", &cfs_period))
+		return 0;
+
+	if (cfs_quota <= 0 || cfs_period <= 0)
+		return 0;
+
+	rv = cfs_quota / cfs_period;
+
+	/* In case quota/period does not yield a whole number, add one CPU for
+	 * the remainder.
+	 */
+	if ((cfs_quota % cfs_period) > 0)
+		rv += 1;
+
+	nprocs = get_nprocs();
+
+	if (rv > nprocs)
+		rv = nprocs;
+
+	return rv;
+}
+
+/*
+ * Determine whether CPU views should be used or not.
+ */
+bool use_cpu_view(const char *cg)
+{
+	int cfd;
+	char *tmpc;
+
+	tmpc = find_mounted_controller("cpu", &cfd);
+	if (!tmpc)
+		return false;
+
+	tmpc = find_mounted_controller("cpuacct", &cfd);
+	if (!tmpc)
+		return false;
+
+	return true;
+}
+
 /*
  * check whether this is a '^processor" line in /proc/cpuinfo
  */
@@ -3531,7 +3610,8 @@ static int proc_cpuinfo_read(char *buf, size_t size, off_t offset,
 	char *line = NULL;
 	size_t linelen = 0, total_len = 0, rv = 0;
 	bool am_printing = false, firstline = true, is_s390x = false;
-	int curcpu = -1, cpu;
+	int curcpu = -1, cpu, max_cpus = 0;
+	bool use_view;
 	char *cache = d->buf;
 	size_t cache_size = d->buflen;
 	FILE *f = NULL;
@@ -3559,6 +3639,11 @@ static int proc_cpuinfo_read(char *buf, size_t size, off_t offset,
 	if (!cpuset)
 		goto err;
 
+	use_view = use_cpu_view(cg);
+
+	if (use_view)
+		max_cpus = max_cpu_count(cg);
+
 	f = fopen("/proc/cpuinfo", "r");
 	if (!f)
 		goto err;
@@ -3576,6 +3661,8 @@ static int proc_cpuinfo_read(char *buf, size_t size, off_t offset,
 		if (strncmp(line, "# processors:", 12) == 0)
 			continue;
 		if (is_processor_line(line)) {
+			if (use_view && max_cpus > 0 && (curcpu+1) == max_cpus)
+				break;
 			am_printing = cpuline_in_cpuset(line, cpuset);
 			if (am_printing) {
 				curcpu ++;
@@ -3597,6 +3684,8 @@ static int proc_cpuinfo_read(char *buf, size_t size, off_t offset,
 			continue;
 		} else if (is_s390x && sscanf(line, "processor %d:", &cpu) == 1) {
 			char *p;
+			if (use_view && max_cpus > 0 && (curcpu+1) == max_cpus)
+				break;
 			if (!cpu_in_cpuset(cpu, cpuset))
 				continue;
 			curcpu ++;

From ef7bc476ca8998ac636de98d026892c6b9fec4f0 Mon Sep 17 00:00:00 2001
From: Jakub Skokan <jakub.skokan at havefun.cz>
Date: Mon, 25 Jun 2018 08:55:23 +0200
Subject: [PATCH 2/7] calc_hash(): do not apply modulo LOAD_SIZE

This will allow `calc_hash()` to be used for other purposes than
the loadavg implementation.

Signed-off-by: Jakub Skokan <jakub.skokan at havefun.cz>
---
 bindings.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/bindings.c b/bindings.c
index 40ba094..ba99a45 100644
--- a/bindings.c
+++ b/bindings.c
@@ -115,7 +115,7 @@ static int calc_hash(char *name)
 			hash ^= (x >> 24);
 		hash &= ~x;
 	}
-	return ((hash & 0x7fffffff) % LOAD_SIZE);
+	return (hash & 0x7fffffff);
 }
 
 struct load_node {
@@ -4841,7 +4841,7 @@ static int proc_loadavg_read(char *buf, size_t size, off_t offset,
 		return read_file("/proc/loadavg", buf, size, d);
 
 	prune_init_slice(cg);
-	hash = calc_hash(cg);
+	hash = calc_hash(cg) % LOAD_SIZE;
 	n = locate_node(cg, hash);
 
 	/* First time */

From 2f051508570a93691b480c294d9bf9d2ede536d1 Mon Sep 17 00:00:00 2001
From: Jakub Skokan <jakub.skokan at havefun.cz>
Date: Mon, 25 Jun 2018 09:05:30 +0200
Subject: [PATCH 3/7] stat: use cpu views

Signed-off-by: Jakub Skokan <jakub.skokan at havefun.cz>
---
 bindings.c | 518 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 517 insertions(+), 1 deletion(-)

diff --git a/bindings.c b/bindings.c
index ba99a45..2f7cc83 100644
--- a/bindings.c
+++ b/bindings.c
@@ -83,6 +83,7 @@ struct file_info {
 struct cpuacct_usage {
 	uint64_t user;
 	uint64_t system;
+	uint64_t idle;
 };
 
 /* The function of hash table.*/
@@ -103,7 +104,7 @@ struct cpuacct_usage {
  */
 static int loadavg = 0;
 static volatile sig_atomic_t loadavg_stop = 0;
-static int calc_hash(char *name)
+static int calc_hash(const char *name)
 {
 	unsigned int hash = 0;
 	unsigned int x = 0;
@@ -285,6 +286,94 @@ static void load_free(void)
 		pthread_rwlock_destroy(&load_hash[i].rdlock);
 	}
 }
+
+/* Data for CPU view */
+struct cg_proc_stat {
+	char *cg;
+	struct cpuacct_usage *usage; // Real usage as read from the host's /proc/stat
+	struct cpuacct_usage *view; // Usage stats reported to the container
+	int cpu_count;
+	struct cg_proc_stat *next;
+};
+
+struct cg_proc_stat_head {
+	struct cg_proc_stat *next;
+};
+
+#define CPU_VIEW_HASH_SIZE 100
+static struct cg_proc_stat_head *proc_stat_history[CPU_VIEW_HASH_SIZE];
+
+static bool cpu_view_init_head(struct cg_proc_stat_head **head)
+{
+	*head = malloc(sizeof(struct cg_proc_stat_head));
+	if (!(*head)) {
+		lxcfs_error("%s\n", strerror(errno));
+		return false;
+	}
+
+	(*head)->next = NULL;
+	return true;
+}
+
+static bool init_cpu_view()
+{
+	int i;
+
+	for (i = 0; i < CPU_VIEW_HASH_SIZE; i++)
+		proc_stat_history[i] = NULL;
+
+	for (i = 0; i < CPU_VIEW_HASH_SIZE; i++) {
+		if (!cpu_view_init_head(&proc_stat_history[i]))
+			goto err;
+	}
+
+	return true;
+
+err:
+	for (i = 0; i < CPU_VIEW_HASH_SIZE; i++) {
+		if (proc_stat_history[i]) {
+			free(proc_stat_history[i]);
+			proc_stat_history[i] = NULL;
+		}
+	}
+
+	return false;
+}
+
+static void cpu_view_free_head(struct cg_proc_stat_head *head)
+{
+	struct cg_proc_stat *node, *tmp;
+
+	if (head->next) {
+		node = head->next;
+
+		for (;;) {
+			tmp = node;
+			node = node->next;
+
+			free(tmp->cg);
+			free(tmp->usage);
+			free(tmp->view);
+			free(tmp);
+
+			if (!node)
+				break;
+		}
+	}
+
+	free(head);
+}
+
+static void free_cpu_view()
+{
+	int i;
+
+	for (i = 0; i < CPU_VIEW_HASH_SIZE; i++) {
+		if (proc_stat_history[i])
+			cpu_view_free_head(proc_stat_history[i]);
+	}
+}
+
 /* Reserve buffer size to account for file size changes. */
 #define BUF_RESERVE_SIZE 512
 
@@ -3989,6 +4078,420 @@ static int read_cpuacct_usage_all(char *cg, char *cpuset, struct cpuacct_usage *
 	return rv;
 }
 
+static unsigned long diff_cpu_usage(struct cpuacct_usage *older, struct cpuacct_usage *newer, struct cpuacct_usage *diff, int cpu_count)
+{
+	int i;
+	unsigned long sum = 0;
+
+	for (i = 0; i < cpu_count; i++) {
+		/* When cpuset is changed on the fly, the CPUs might get reordered.
+		 * We could either reset all counters, or check that the substractions
+		 * below will return expected results.
+		 */
+		if (newer[i].user > older[i].user)
+			diff[i].user = newer[i].user - older[i].user;
+		else
+			diff[i].user = 0;
+
+		if (newer[i].system > older[i].system)
+			diff[i].system = newer[i].system - older[i].system;
+		else
+			diff[i].system = 0;
+
+		if (newer[i].idle > older[i].idle)
+			diff[i].idle = newer[i].idle - older[i].idle;
+		else
+			diff[i].idle = 0;
+
+		sum += diff[i].user;
+		sum += diff[i].system;
+		sum += diff[i].idle;
+	}
+
+	return sum;
+}
+
+static void add_cpu_usage(unsigned long *surplus, struct cpuacct_usage *usage, unsigned long *counter, unsigned long threshold)
+{
+	unsigned long free_space, to_add;
+
+	free_space = threshold - usage->user - usage->system;
+
+	if (free_space > usage->idle)
+		free_space = usage->idle;
+
+	to_add = free_space > *surplus ? *surplus : free_space;
+
+	*counter += to_add;
+	usage->idle -= to_add;
+	*surplus -= to_add;
+}
+
+static struct cg_proc_stat *find_proc_stat_node(const char *cg)
+{
+	int hash = calc_hash(cg) % CPU_VIEW_HASH_SIZE;
+	struct cg_proc_stat_head *head = proc_stat_history[hash];
+	struct cg_proc_stat *node;
+
+	if (!head->next)
+		return NULL;
+
+	node = head->next;
+
+	do {
+		if (strcmp(cg, node->cg) == 0)
+			return node;
+	} while ((node = node->next));
+
+	return NULL;
+}
+
+static struct cg_proc_stat *new_proc_stat_node(struct cpuacct_usage *usage, int cpu_count, const char *cg)
+{
+	struct cg_proc_stat *node;
+	int i;
+
+	node = malloc(sizeof(struct cg_proc_stat));
+	if (!node)
+		goto err;
+
+	node->cg = malloc(strlen(cg) + 1);
+	if (!node->cg)
+		goto err;
+
+	strcpy(node->cg, cg);
+
+	node->usage = malloc(sizeof(struct cpuacct_usage) * cpu_count);
+	if (!node->usage)
+		goto err;
+
+	memcpy(node->usage, usage, sizeof(struct cpuacct_usage) * cpu_count);
+
+	node->view = malloc(sizeof(struct cpuacct_usage) * cpu_count);
+	if (!node->view)
+		goto err;
+
+	node->cpu_count = cpu_count;
+	node->next = NULL;
+
+	for (i = 0; i < cpu_count; i++) {
+		node->view[i].user = 0;
+		node->view[i].system = 0;
+		node->view[i].idle = 0;
+	}
+
+	return node;
+
+err:
+	if (node && node->cg)
+		free(node->cg);
+	if (node && node->usage)
+		free(node->usage);
+	if (node && node->view)
+		free(node->view);
+	if (node)
+		free(node);
+
+	return NULL;
+}
+
+static void add_proc_stat_node(struct cg_proc_stat *new_node)
+{
+	int hash = calc_hash(new_node->cg) % CPU_VIEW_HASH_SIZE;
+	struct cg_proc_stat_head *head = proc_stat_history[hash];
+	struct cg_proc_stat *node;
+
+	if (!head->next) {
+		head->next = new_node;
+		return;
+	}
+
+	for (;;) {
+		node = head->next;
+
+		if (node->next) {
+			node = node->next;
+			continue;
+		}
+
+		node->next = new_node;
+		return;
+	}
+}
+
+static void reset_proc_stat_node(struct cg_proc_stat *node, struct cpuacct_usage *usage, int cpu_count)
+{
+	int i;
+
+	lxcfs_debug("Resetting stat node for %s\n", node->cg);
+	memcpy(node->usage, usage, sizeof(struct cpuacct_usage) * cpu_count);
+
+	for (i = 0; i < cpu_count; i++) {
+		node->view[i].user = 0;
+		node->view[i].system = 0;
+		node->view[i].idle = 0;
+	}
+
+	node->cpu_count = cpu_count;
+}
+
+static int cpu_view_proc_stat(const char *cg, const char *cpuset, struct cpuacct_usage *cg_cpu_usage, FILE *f, char *buf, size_t buf_size)
+{
+	char *line = NULL;
+	size_t linelen = 0, total_len = 0, rv = 0, l;
+	int curcpu = -1; /* cpu numbering starts at 0 */
+	int max_cpus = max_cpu_count(cg), cpu_cnt = 0;
+	unsigned long user = 0, nice = 0, system = 0, idle = 0, iowait = 0, irq = 0, softirq = 0, steal = 0, guest = 0, guest_nice = 0;
+	unsigned long user_sum = 0, system_sum = 0, idle_sum = 0;
+	unsigned long user_surplus = 0, system_surplus = 0;
+	unsigned long total_sum, threshold;
+	struct cg_proc_stat *stat_node;
+	struct cpuacct_usage *diff = NULL;
+	int nprocs = get_nprocs();
+
+	/* Read all CPU stats and stop when we've encountered other lines */
+	while (getline(&line, &linelen, f) != -1) {
+		int cpu, ret;
+		char cpu_char[10]; /* That's a lot of cores */
+		uint64_t all_used, cg_used;
+
+		if (strlen(line) == 0)
+			continue;
+		if (sscanf(line, "cpu%9[^ ]", cpu_char) != 1) {
+			/* not a ^cpuN line containing a number N */
+			break;
+		}
+
+		if (sscanf(cpu_char, "%d", &cpu) != 1)
+			continue;
+		if (!cpu_in_cpuset(cpu, cpuset))
+			continue;
+		curcpu ++;
+		cpu_cnt ++;
+
+		ret = sscanf(line, "%*s %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu",
+			   &user,
+			   &nice,
+			   &system,
+			   &idle,
+			   &iowait,
+			   &irq,
+			   &softirq,
+			   &steal,
+			   &guest,
+			   &guest_nice);
+
+		if (ret != 10)
+			continue;
+
+		all_used = user + nice + system + iowait + irq + softirq + steal + guest + guest_nice;
+		cg_used = cg_cpu_usage[curcpu].user + cg_cpu_usage[curcpu].system;
+
+		if (all_used >= cg_used) {
+			cg_cpu_usage[curcpu].idle = idle + (all_used - cg_used);
+
+		} else {
+			lxcfs_error("cpu%d from %s has unexpected cpu time: %lu in /proc/stat, "
+					"%lu in cpuacct.usage_all; unable to determine idle time\n",
+					curcpu, cg, all_used, cg_used);
+			cg_cpu_usage[curcpu].idle = idle;
+		}
+	}
+
+	/* Cannot use more CPUs than is available due to cpuset */
+	if (max_cpus > cpu_cnt)
+		max_cpus = cpu_cnt;
+
+	stat_node = find_proc_stat_node(cg);
+
+	if (!stat_node) {
+		stat_node = new_proc_stat_node(cg_cpu_usage, nprocs, cg);
+		if (!stat_node) {
+			rv = 0;
+			goto err;
+		}
+
+		add_proc_stat_node(stat_node);
+	}
+
+	diff = malloc(sizeof(struct cpuacct_usage) * nprocs);
+	if (!diff) {
+		rv = 0;
+		goto err;
+	}
+
+	/*
+	 * If the new values are LOWER than values stored in memory, it means
+	 * the cgroup has been reset/recreated and we should reset too.
+	 */
+	if (cg_cpu_usage[0].user < stat_node->usage[0].user)
+		reset_proc_stat_node(stat_node, cg_cpu_usage, nprocs);
+
+	total_sum = diff_cpu_usage(stat_node->usage, cg_cpu_usage, diff, cpu_cnt);
+
+	for (curcpu = 0; curcpu < cpu_cnt; curcpu++) {
+		stat_node->usage[curcpu].user += diff[curcpu].user;
+		stat_node->usage[curcpu].system += diff[curcpu].system;
+		stat_node->usage[curcpu].idle += diff[curcpu].idle;
+
+		if (max_cpus > 0 && curcpu >= max_cpus) {
+			user_surplus += diff[curcpu].user;
+			system_surplus += diff[curcpu].system;
+		}
+	}
+
+	/* Calculate usage counters of visible CPUs */
+	if (max_cpus > 0) {
+		/* threshold = maximum usage per cpu, including idle */
+		threshold = total_sum / cpu_cnt * max_cpus;
+
+		for (curcpu = 0; curcpu < max_cpus; curcpu++) {
+			if (diff[curcpu].user + diff[curcpu].system >= threshold)
+				continue;
+
+			/* Add user */
+			add_cpu_usage(
+					&user_surplus,
+					&diff[curcpu],
+					&diff[curcpu].user,
+					threshold);
+
+			if (diff[curcpu].user + diff[curcpu].system >= threshold)
+				continue;
+
+			/* If there is still room, add system */
+			add_cpu_usage(
+					&system_surplus,
+					&diff[curcpu],
+					&diff[curcpu].system,
+					threshold);
+		}
+
+		if (user_surplus > 0)
+			lxcfs_debug("leftover user: %lu for %s\n", user_surplus, cg);
+		if (system_surplus > 0)
+			lxcfs_debug("leftover system: %lu for %s\n", system_surplus, cg);
+
+		for (curcpu = 0; curcpu < max_cpus; curcpu++) {
+			stat_node->view[curcpu].user += diff[curcpu].user;
+			stat_node->view[curcpu].system += diff[curcpu].system;
+			stat_node->view[curcpu].idle += diff[curcpu].idle;
+
+			user_sum += stat_node->view[curcpu].user;
+			system_sum += stat_node->view[curcpu].system;
+			idle_sum += stat_node->view[curcpu].idle;
+		}
+
+	} else {
+		for (curcpu = 0; curcpu < cpu_cnt; curcpu++) {
+			stat_node->view[curcpu].user = stat_node->usage[curcpu].user;
+			stat_node->view[curcpu].system = stat_node->usage[curcpu].system;
+			stat_node->view[curcpu].idle = stat_node->usage[curcpu].idle;
+
+			user_sum += stat_node->view[curcpu].user;
+			system_sum += stat_node->view[curcpu].system;
+			idle_sum += stat_node->view[curcpu].idle;
+		}
+	}
+
+	/* Render the file */
+	/* cpu-all */
+	l = snprintf(buf, buf_size, "cpu  %lu 0 %lu %lu 0 0 0 0 0 0\n",
+			user_sum,
+			system_sum,
+			idle_sum);
+
+	if (l < 0) {
+		perror("Error writing to cache");
+		rv = 0;
+		goto err;
+
+	}
+	if (l >= buf_size) {
+		lxcfs_error("%s\n", "Internal error: truncated write to cache.");
+		rv = 0;
+		goto err;
+	}
+
+	buf += l;
+	buf_size -= l;
+	total_len += l;
+
+	/* Render visible CPUs */
+	for (curcpu = 0; curcpu < cpu_cnt; curcpu++) {
+		if (max_cpus > 0 && curcpu == max_cpus)
+			break;
+
+		l = snprintf(buf, buf_size, "cpu%d %lu 0 %lu %lu 0 0 0 0 0 0\n",
+				curcpu,
+				stat_node->view[curcpu].user,
+				stat_node->view[curcpu].system,
+				stat_node->view[curcpu].idle);
+
+		if (l < 0) {
+			perror("Error writing to cache");
+			rv = 0;
+			goto err;
+
+		}
+		if (l >= buf_size) {
+			lxcfs_error("%s\n", "Internal error: truncated write to cache.");
+			rv = 0;
+			goto err;
+		}
+
+		buf += l;
+		buf_size -= l;
+		total_len += l;
+	}
+
+	/* Pass the rest of /proc/stat, start with the last line read */
+	l = snprintf(buf, buf_size, "%s", line);
+
+	if (l < 0) {
+		perror("Error writing to cache");
+		rv = 0;
+		goto err;
+
+	}
+	if (l >= buf_size) {
+		lxcfs_error("%s\n", "Internal error: truncated write to cache.");
+		rv = 0;
+		goto err;
+	}
+
+	buf += l;
+	buf_size -= l;
+	total_len += l;
+
+	/* Pass the rest of the host's /proc/stat */
+	while (getline(&line, &linelen, f) != -1) {
+		l = snprintf(buf, buf_size, "%s", line);
+		if (l < 0) {
+			perror("Error writing to cache");
+			rv = 0;
+			goto err;
+		}
+		if (l >= buf_size) {
+			lxcfs_error("%s\n", "Internal error: truncated write to cache.");
+			rv = 0;
+			goto err;
+		}
+		buf += l;
+		buf_size -= l;
+		total_len += l;
+	}
+
+	rv = total_len;
+
+err:
+	if (line)
+		free(line);
+	if (diff)
+		free(diff);
+	return rv;
+}
+
 #define CPUALL_MAX_SIZE (BUF_RESERVE_SIZE / 2)
 static int proc_stat_read(char *buf, size_t size, off_t offset,
 		struct fuse_file_info *fi)
@@ -4053,6 +4556,11 @@ static int proc_stat_read(char *buf, size_t size, off_t offset,
 		goto err;
 	}
 
+	if (use_cpu_view(cg) && cg_cpu_usage) {
+		total_len = cpu_view_proc_stat(cg, cpuset, cg_cpu_usage, f, d->buf, d->buflen);
+		goto out;
+	}
+
 	while (getline(&line, &linelen, f) != -1) {
 		ssize_t l;
 		int cpu;
@@ -4201,6 +4709,8 @@ static int proc_stat_read(char *buf, size_t size, off_t offset,
 
 	memmove(cache, d->buf + CPUALL_MAX_SIZE, total_len);
 	total_len += cpuall_len;
+
+out:
 	d->cached = 1;
 	d->size = total_len;
 	if (total_len > size)
@@ -5501,6 +6011,11 @@ static void __attribute__((constructor)) collect_and_mount_subsystems(void)
 	if (!cret || chdir(cwd) < 0)
 		lxcfs_debug("Could not change back to original working directory: %s.\n", strerror(errno));
 
+	if (!init_cpu_view()) {
+		lxcfs_error("%s\n", "failed to init CPU view");
+		goto out;
+	}
+
 	print_subsystems();
 
 out:
@@ -5524,6 +6039,7 @@ static void __attribute__((destructor)) free_subsystems(void)
 	}
 	free(hierarchies);
 	free(fd_hierarchies);
+	free_cpu_view();
 
 	if (cgroup_mount_ns_fd >= 0)
 		close(cgroup_mount_ns_fd);

From c668b4757201195a8b5f8b24014e58be3c587024 Mon Sep 17 00:00:00 2001
From: Jakub Skokan <jakub.skokan at havefun.cz>
Date: Mon, 25 Jun 2018 14:47:43 +0200
Subject: [PATCH 4/7] CPU view: prune stale entries

Signed-off-by: Jakub Skokan <jakub.skokan at havefun.cz>
---
 bindings.c | 91 +++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 84 insertions(+), 7 deletions(-)

diff --git a/bindings.c b/bindings.c
index 2f7cc83..2c356c0 100644
--- a/bindings.c
+++ b/bindings.c
@@ -298,6 +298,7 @@ struct cg_proc_stat {
 
 struct cg_proc_stat_head {
 	struct cg_proc_stat *next;
+	time_t lastcheck;
 };
 
 #define CPU_VIEW_HASH_SIZE 100
@@ -311,6 +312,7 @@ static bool cpu_view_init_head(struct cg_proc_stat_head **head)
 		return false;
 	}
 
+	(*head)->lastcheck = time(NULL);
 	(*head)->next = NULL;
 	return true;
 }
@@ -340,6 +342,14 @@ static bool init_cpu_view()
 	return false;
 }
 
+static void free_proc_stat_node(struct cg_proc_stat *node)
+{
+	free(node->cg);
+	free(node->usage);
+	free(node->view);
+	free(node);
+}
+
 static void cpu_view_free_head(struct cg_proc_stat_head *head)
 {
 	struct cg_proc_stat *node, *tmp;
@@ -350,11 +360,7 @@ static void cpu_view_free_head(struct cg_proc_stat_head *head)
 		for (;;) {
 			tmp = node;
 			node = node->next;
-
-			free(tmp->cg);
-			free(tmp->usage);
-			free(tmp->view);
-			free(tmp);
+			free_proc_stat_node(tmp);
 
 			if (!node)
 				break;
@@ -1182,6 +1188,28 @@ bool cgfs_get_value(const char *controller, const char *cgroup, const char *file
 	return *value != NULL;
 }
 
+bool cgfs_param_exist(const char *controller, const char *cgroup, const char *file)
+{
+	int ret, cfd;
+	size_t len;
+	char *fnam, *tmpc;
+
+	tmpc = find_mounted_controller(controller, &cfd);
+	if (!tmpc)
+		return false;
+
+	/* Make sure we pass a relative path to *at() family of functions.
+	 * . + /cgroup + / + file + \0
+	 */
+	len = strlen(cgroup) + strlen(file) + 3;
+	fnam = alloca(len);
+	ret = snprintf(fnam, len, "%s%s/%s", *cgroup == '/' ? "." : "", cgroup, file);
+	if (ret < 0 || (size_t)ret >= len)
+		return false;
+
+	return (faccessat(cfd, fnam, F_OK, 0) == 0);
+}
+
 struct cgfs_files *cgfs_get_key(const char *controller, const char *cgroup, const char *file)
 {
 	int ret, cfd;
@@ -4127,6 +4155,51 @@ static void add_cpu_usage(unsigned long *surplus, struct cpuacct_usage *usage, u
 	*surplus -= to_add;
 }
 
+static struct cg_proc_stat *prune_proc_stat_list(struct cg_proc_stat *node)
+{
+	struct cg_proc_stat *first = NULL, *prev, *tmp;
+
+	for (prev = NULL; node; ) {
+		if (!cgfs_param_exist("cpu", node->cg, "cpu.shares")) {
+			tmp = node;
+			lxcfs_debug("Removing stat node for %s\n", node->cg);
+
+			if (prev)
+				prev->next = node->next;
+			else
+				first = node->next;
+
+			node = node->next;
+			free_proc_stat_node(tmp);
+		} else {
+			if (!first)
+				first = node;
+			prev = node;
+			node = node->next;
+		}
+	}
+
+	return first;
+}
+
+#define PROC_STAT_PRUNE_INTERVAL 10
+static void prune_proc_stat_history(void)
+{
+	int i;
+	time_t now = time(NULL);
+
+	for (i = 0; i < CPU_VIEW_HASH_SIZE; i++) {
+		if ((proc_stat_history[i]->lastcheck + PROC_STAT_PRUNE_INTERVAL) > now)
+			return;
+
+		if (!proc_stat_history[i]->next)
+			continue;
+
+		proc_stat_history[i]->next = prune_proc_stat_list(proc_stat_history[i]->next);
+		proc_stat_history[i]->lastcheck = now;
+	}
+}
+
 static struct cg_proc_stat *find_proc_stat_node(const char *cg)
 {
 	int hash = calc_hash(cg) % CPU_VIEW_HASH_SIZE;
@@ -4140,10 +4213,14 @@ static struct cg_proc_stat *find_proc_stat_node(const char *cg)
 
 	do {
 		if (strcmp(cg, node->cg) == 0)
-			return node;
+			goto out;
 	} while ((node = node->next));
 
-	return NULL;
+	node = NULL;
+
+out:
+	prune_proc_stat_history();
+	return node;
 }
 
 static struct cg_proc_stat *new_proc_stat_node(struct cpuacct_usage *usage, int cpu_count, const char *cg)

From 06b2965683e900d38792744e37f3ab54a17547da Mon Sep 17 00:00:00 2001
From: Jakub Skokan <jakub.skokan at havefun.cz>
Date: Mon, 25 Jun 2018 18:24:14 +0200
Subject: [PATCH 5/7] CPU view: add mutexes

Signed-off-by: Jakub Skokan <jakub.skokan at havefun.cz>
---
 bindings.c | 104 +++++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 82 insertions(+), 22 deletions(-)

diff --git a/bindings.c b/bindings.c
index 2c356c0..45244bb 100644
--- a/bindings.c
+++ b/bindings.c
@@ -293,12 +293,18 @@ struct cg_proc_stat {
 	struct cpuacct_usage *usage; // Real usage as read from the host's /proc/stat
 	struct cpuacct_usage *view; // Usage stats reported to the container
 	int cpu_count;
+	pthread_mutex_t lock; // For node manipulation
 	struct cg_proc_stat *next;
 };
 
 struct cg_proc_stat_head {
 	struct cg_proc_stat *next;
 	time_t lastcheck;
+
+	/*
+	 * For access to the list. Reading can be parallel, pruning is exclusive.
+	 */
+	pthread_rwlock_t lock;
 };
 
 #define CPU_VIEW_HASH_SIZE 100
@@ -314,6 +320,13 @@ static bool cpu_view_init_head(struct cg_proc_stat_head **head)
 
 	(*head)->lastcheck = time(NULL);
 	(*head)->next = NULL;
+
+	if (pthread_rwlock_init(&(*head)->lock, NULL) != 0) {
+		lxcfs_error("%s\n", "Failed to initialize list lock");
+		free(*head);
+		return false;
+	}
+
 	return true;
 }
 
@@ -344,6 +357,7 @@ static bool init_cpu_view()
 
 static void free_proc_stat_node(struct cg_proc_stat *node)
 {
+	pthread_mutex_destroy(&node->lock);
 	free(node->cg);
 	free(node->usage);
 	free(node->view);
@@ -367,6 +381,7 @@ static void cpu_view_free_head(struct cg_proc_stat_head *head)
 		}
 	}
 
+	pthread_rwlock_destroy(&head->lock);
 	free(head);
 }
 
@@ -4189,25 +4204,32 @@ static void prune_proc_stat_history(void)
 	time_t now = time(NULL);
 
 	for (i = 0; i < CPU_VIEW_HASH_SIZE; i++) {
-		if ((proc_stat_history[i]->lastcheck + PROC_STAT_PRUNE_INTERVAL) > now)
+		pthread_rwlock_wrlock(&proc_stat_history[i]->lock);
+
+		if ((proc_stat_history[i]->lastcheck + PROC_STAT_PRUNE_INTERVAL) > now) {
+			pthread_rwlock_unlock(&proc_stat_history[i]->lock);
 			return;
+		}
 
-		if (!proc_stat_history[i]->next)
-			continue;
+		if (proc_stat_history[i]->next) {
+			proc_stat_history[i]->next = prune_proc_stat_list(proc_stat_history[i]->next);
+			proc_stat_history[i]->lastcheck = now;
+		}
 
-		proc_stat_history[i]->next = prune_proc_stat_list(proc_stat_history[i]->next);
-		proc_stat_history[i]->lastcheck = now;
+		pthread_rwlock_unlock(&proc_stat_history[i]->lock);
 	}
 }
 
-static struct cg_proc_stat *find_proc_stat_node(const char *cg)
+static struct cg_proc_stat *find_proc_stat_node(struct cg_proc_stat_head *head, const char *cg)
 {
-	int hash = calc_hash(cg) % CPU_VIEW_HASH_SIZE;
-	struct cg_proc_stat_head *head = proc_stat_history[hash];
 	struct cg_proc_stat *node;
 
-	if (!head->next)
+	pthread_rwlock_rdlock(&head->lock);
+
+	if (!head->next) {
+		pthread_rwlock_unlock(&head->lock);
 		return NULL;
+	}
 
 	node = head->next;
 
@@ -4219,6 +4241,7 @@ static struct cg_proc_stat *find_proc_stat_node(const char *cg)
 	node = NULL;
 
 out:
+	pthread_rwlock_unlock(&head->lock);
 	prune_proc_stat_history();
 	return node;
 }
@@ -4251,6 +4274,11 @@ static struct cg_proc_stat *new_proc_stat_node(struct cpuacct_usage *usage, int
 	node->cpu_count = cpu_count;
 	node->next = NULL;
 
+	if (pthread_mutex_init(&node->lock, NULL) != 0) {
+		lxcfs_error("%s\n", "Failed to initialize node lock");
+		goto err;
+	}
+
 	for (i = 0; i < cpu_count; i++) {
 		node->view[i].user = 0;
 		node->view[i].system = 0;
@@ -4272,19 +4300,28 @@ static struct cg_proc_stat *new_proc_stat_node(struct cpuacct_usage *usage, int
 	return NULL;
 }
 
-static void add_proc_stat_node(struct cg_proc_stat *new_node)
+static struct cg_proc_stat *add_proc_stat_node(struct cg_proc_stat *new_node)
 {
 	int hash = calc_hash(new_node->cg) % CPU_VIEW_HASH_SIZE;
 	struct cg_proc_stat_head *head = proc_stat_history[hash];
-	struct cg_proc_stat *node;
+	struct cg_proc_stat *node, *rv = new_node;
+
+	pthread_rwlock_wrlock(&head->lock);
 
 	if (!head->next) {
 		head->next = new_node;
-		return;
+		goto out;
 	}
 
+	node = head->next;
+
 	for (;;) {
-		node = head->next;
+		if (strcmp(node->cg, new_node->cg) == 0) {
+			/* The node is already present, return it */
+			free_proc_stat_node(new_node);
+			rv = node;
+			goto out;
+		}
 
 		if (node->next) {
 			node = node->next;
@@ -4292,8 +4329,33 @@ static void add_proc_stat_node(struct cg_proc_stat *new_node)
 		}
 
 		node->next = new_node;
-		return;
+		goto out;
+	}
+
+out:
+	pthread_rwlock_unlock(&head->lock);
+	return rv;
+}
+
+static struct cg_proc_stat *find_or_create_proc_stat_node(struct cpuacct_usage *usage, int cpu_count, const char *cg)
+{
+	int hash = calc_hash(cg) % CPU_VIEW_HASH_SIZE;
+	struct cg_proc_stat_head *head = proc_stat_history[hash];
+	struct cg_proc_stat *node;
+
+	node = find_proc_stat_node(head, cg);
+
+	if (!node) {
+		node = new_proc_stat_node(usage, cpu_count, cg);
+		if (!node)
+			return NULL;
+
+		node = add_proc_stat_node(node);
+		lxcfs_debug("New stat node (%d) for %s\n", cpu_count, cg);
 	}
+
+	pthread_mutex_lock(&node->lock);
+	return node;
 }
 
 static void reset_proc_stat_node(struct cg_proc_stat *node, struct cpuacct_usage *usage, int cpu_count)
@@ -4379,16 +4441,12 @@ static int cpu_view_proc_stat(const char *cg, const char *cpuset, struct cpuacct
 	if (max_cpus > cpu_cnt)
 		max_cpus = cpu_cnt;
 
-	stat_node = find_proc_stat_node(cg);
+	stat_node = find_or_create_proc_stat_node(cg_cpu_usage, nprocs, cg);
 
 	if (!stat_node) {
-		stat_node = new_proc_stat_node(cg_cpu_usage, nprocs, cg);
-		if (!stat_node) {
-			rv = 0;
-			goto err;
-		}
-
-		add_proc_stat_node(stat_node);
+		lxcfs_error("unable to find/create stat node for %s\n", cg);
+		rv = 0;
+		goto err;
 	}
 
 	diff = malloc(sizeof(struct cpuacct_usage) * nprocs);
@@ -4562,6 +4620,8 @@ static int cpu_view_proc_stat(const char *cg, const char *cpuset, struct cpuacct
 	rv = total_len;
 
 err:
+	if (stat_node)
+		pthread_mutex_unlock(&stat_node->lock);
 	if (line)
 		free(line);
 	if (diff)

From a1f3b053b38819bfb826ed055822f53191f5b72a Mon Sep 17 00:00:00 2001
From: Jakub Skokan <jakub.skokan at havefun.cz>
Date: Sun, 21 Oct 2018 09:04:12 +0200
Subject: [PATCH 6/7] CPU view: handle CPU hotplug at runtime

Signed-off-by: Jakub Skokan <jakub.skokan at havefun.cz>
---
 bindings.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)

diff --git a/bindings.c b/bindings.c
index 45244bb..150ff66 100644
--- a/bindings.c
+++ b/bindings.c
@@ -4337,6 +4337,53 @@ static struct cg_proc_stat *add_proc_stat_node(struct cg_proc_stat *new_node)
 	return rv;
 }
 
+static bool expand_proc_stat_node(struct cg_proc_stat *node, int cpu_count)
+{
+	struct cpuacct_usage *new_usage, *new_view;
+	int i;
+
+	/* Allocate new memory */
+	new_usage = malloc(sizeof(struct cpuacct_usage) * cpu_count);
+	if (!new_usage)
+		return false;
+
+	new_view = malloc(sizeof(struct cpuacct_usage) * cpu_count);
+	if (!new_view) {
+		free(new_usage);
+		return false;
+	}
+
+	/* Copy existing data & initialize new elements */
+	for (i = 0; i < cpu_count; i++) {
+		if (i < node->cpu_count) {
+			new_usage[i].user = node->usage[i].user;
+			new_usage[i].system = node->usage[i].system;
+			new_usage[i].idle = node->usage[i].idle;
+
+			new_view[i].user = node->view[i].user;
+			new_view[i].system = node->view[i].system;
+			new_view[i].idle = node->view[i].idle;
+		} else {
+			new_usage[i].user = 0;
+			new_usage[i].system = 0;
+			new_usage[i].idle = 0;
+
+			new_view[i].user = 0;
+			new_view[i].system = 0;
+			new_view[i].idle = 0;
+		}
+	}
+
+	free(node->usage);
+	free(node->view);
+
+	node->usage = new_usage;
+	node->view = new_view;
+	node->cpu_count = cpu_count;
+
+	return true;
+}
+
 static struct cg_proc_stat *find_or_create_proc_stat_node(struct cpuacct_usage *usage, int cpu_count, const char *cg)
 {
 	int hash = calc_hash(cg) % CPU_VIEW_HASH_SIZE;
@@ -4355,6 +4402,21 @@ static struct cg_proc_stat *find_or_create_proc_stat_node(struct cpuacct_usage *
 	}
 
 	pthread_mutex_lock(&node->lock);
+
+	/* If additional CPUs on the host have been enabled, CPU usage counter
+	 * arrays have to be expanded */
+	if (node->cpu_count < cpu_count) {
+		lxcfs_debug("Expanding stat node %d->%d for %s\n",
+				node->cpu_count, cpu_count, cg);
+
+		if (!expand_proc_stat_node(node, cpu_count)) {
+			pthread_mutex_unlock(&node->lock);
+			lxcfs_debug("Unable to expand stat node %d->%d for %s\n",
+					node->cpu_count, cpu_count, cg);
+			return NULL;
+		}
+	}
+
 	return node;
 }
 

From 73d4bf063ed3ffed526e60ba6eceec1541c31c25 Mon Sep 17 00:00:00 2001
From: Jakub Skokan <jakub.skokan at havefun.cz>
Date: Sun, 21 Oct 2018 15:13:07 +0200
Subject: [PATCH 7/7] CPU view: handle disabling/enabling of physical CPUs at
 runtime

Signed-off-by: Jakub Skokan <jakub.skokan at havefun.cz>
---
 bindings.c | 110 ++++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 83 insertions(+), 27 deletions(-)

diff --git a/bindings.c b/bindings.c
index 150ff66..1a5e027 100644
--- a/bindings.c
+++ b/bindings.c
@@ -84,6 +84,7 @@ struct cpuacct_usage {
 	uint64_t user;
 	uint64_t system;
 	uint64_t idle;
+	bool online;
 };
 
 /* The function of hash table.*/
@@ -4045,7 +4046,7 @@ static uint64_t get_reaper_age(pid_t pid)
  */
 static int read_cpuacct_usage_all(char *cg, char *cpuset, struct cpuacct_usage **return_usage)
 {
-	int cpucount = get_nprocs();
+	int cpucount = get_nprocs_conf();
 	struct cpuacct_usage *cpu_usage;
 	int rv = 0, i, j, ret, read_pos = 0, read_cnt;
 	int cg_cpu;
@@ -4097,9 +4098,6 @@ static int read_cpuacct_usage_all(char *cg, char *cpuset, struct cpuacct_usage *
 
 		read_pos += read_cnt;
 
-		if (!cpu_in_cpuset(i, cpuset))
-			continue;
-
 		/* Convert the time from nanoseconds to USER_HZ */
 		cpu_usage[j].user = cg_user / 1000.0 / 1000 / 1000 * ticks_per_sec;
 		cpu_usage[j].system = cg_system / 1000.0 / 1000 / 1000 * ticks_per_sec;
@@ -4127,6 +4125,9 @@ static unsigned long diff_cpu_usage(struct cpuacct_usage *older, struct cpuacct_
 	unsigned long sum = 0;
 
 	for (i = 0; i < cpu_count; i++) {
+		if (!newer[i].online)
+			continue;
+
 		/* When cpuset is changed on the fly, the CPUs might get reordered.
 		 * We could either reset all counters, or check that the substractions
 		 * below will return expected results.
@@ -4441,6 +4442,7 @@ static int cpu_view_proc_stat(const char *cg, const char *cpuset, struct cpuacct
 	char *line = NULL;
 	size_t linelen = 0, total_len = 0, rv = 0, l;
 	int curcpu = -1; /* cpu numbering starts at 0 */
+	int physcpu, i;
 	int max_cpus = max_cpu_count(cg), cpu_cnt = 0;
 	unsigned long user = 0, nice = 0, system = 0, idle = 0, iowait = 0, irq = 0, softirq = 0, steal = 0, guest = 0, guest_nice = 0;
 	unsigned long user_sum = 0, system_sum = 0, idle_sum = 0;
@@ -4448,11 +4450,11 @@ static int cpu_view_proc_stat(const char *cg, const char *cpuset, struct cpuacct
 	unsigned long total_sum, threshold;
 	struct cg_proc_stat *stat_node;
 	struct cpuacct_usage *diff = NULL;
-	int nprocs = get_nprocs();
+	int nprocs = get_nprocs_conf();
 
 	/* Read all CPU stats and stop when we've encountered other lines */
 	while (getline(&line, &linelen, f) != -1) {
-		int cpu, ret;
+		int ret;
 		char cpu_char[10]; /* That's a lot of cores */
 		uint64_t all_used, cg_used;
 
@@ -4463,13 +4465,29 @@ static int cpu_view_proc_stat(const char *cg, const char *cpuset, struct cpuacct
 			break;
 		}
 
-		if (sscanf(cpu_char, "%d", &cpu) != 1)
-			continue;
-		if (!cpu_in_cpuset(cpu, cpuset))
+		if (sscanf(cpu_char, "%d", &physcpu) != 1)
 			continue;
+
 		curcpu ++;
 		cpu_cnt ++;
 
+		if (!cpu_in_cpuset(physcpu, cpuset)) {
+			for (i = curcpu; i <= physcpu; i++) {
+				cg_cpu_usage[i].online = false;
+			}
+			continue;
+		}
+
+		if (curcpu < physcpu) {
+			/* Some CPUs may be disabled */
+			for (i = curcpu; i < physcpu; i++)
+				cg_cpu_usage[i].online = false;
+
+			curcpu = physcpu;
+		}
+
+		cg_cpu_usage[curcpu].online = true;
+
 		ret = sscanf(line, "%*s %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu",
 			   &user,
 			   &nice,
@@ -4521,17 +4539,31 @@ static int cpu_view_proc_stat(const char *cg, const char *cpuset, struct cpuacct
 	 * If the new values are LOWER than values stored in memory, it means
 	 * the cgroup has been reset/recreated and we should reset too.
 	 */
-	if (cg_cpu_usage[0].user < stat_node->usage[0].user)
-		reset_proc_stat_node(stat_node, cg_cpu_usage, nprocs);
+	for (curcpu = 0; curcpu < nprocs; curcpu++) {
+		if (!cg_cpu_usage[curcpu].online)
+			continue;
+
+		if (cg_cpu_usage[curcpu].user < stat_node->usage[curcpu].user)
+			reset_proc_stat_node(stat_node, cg_cpu_usage, nprocs);
+
+		break;
+	}
 
-	total_sum = diff_cpu_usage(stat_node->usage, cg_cpu_usage, diff, cpu_cnt);
+	total_sum = diff_cpu_usage(stat_node->usage, cg_cpu_usage, diff, nprocs);
+
+	for (curcpu = 0, i = -1; curcpu < nprocs; curcpu++) {
+		stat_node->usage[curcpu].online = cg_cpu_usage[curcpu].online;
+
+		if (!stat_node->usage[curcpu].online)
+			continue;
+
+		i++;
 
-	for (curcpu = 0; curcpu < cpu_cnt; curcpu++) {
 		stat_node->usage[curcpu].user += diff[curcpu].user;
 		stat_node->usage[curcpu].system += diff[curcpu].system;
 		stat_node->usage[curcpu].idle += diff[curcpu].idle;
 
-		if (max_cpus > 0 && curcpu >= max_cpus) {
+		if (max_cpus > 0 && i >= max_cpus) {
 			user_surplus += diff[curcpu].user;
 			system_surplus += diff[curcpu].system;
 		}
@@ -4542,7 +4574,15 @@ static int cpu_view_proc_stat(const char *cg, const char *cpuset, struct cpuacct
 		/* threshold = maximum usage per cpu, including idle */
 		threshold = total_sum / cpu_cnt * max_cpus;
 
-		for (curcpu = 0; curcpu < max_cpus; curcpu++) {
+		for (curcpu = 0, i = -1; curcpu < nprocs; curcpu++) {
+			if (i == max_cpus)
+				break;
+
+			if (!stat_node->usage[curcpu].online)
+				continue;
+
+			i++;
+
 			if (diff[curcpu].user + diff[curcpu].system >= threshold)
 				continue;
 
@@ -4569,7 +4609,15 @@ static int cpu_view_proc_stat(const char *cg, const char *cpuset, struct cpuacct
 		if (system_surplus > 0)
 			lxcfs_debug("leftover system: %lu for %s\n", system_surplus, cg);
 
-		for (curcpu = 0; curcpu < max_cpus; curcpu++) {
+		for (curcpu = 0, i = -1; curcpu < nprocs; curcpu++) {
+			if (i == max_cpus)
+				break;
+
+			if (!stat_node->usage[curcpu].online)
+				continue;
+
+			i++;
+
 			stat_node->view[curcpu].user += diff[curcpu].user;
 			stat_node->view[curcpu].system += diff[curcpu].system;
 			stat_node->view[curcpu].idle += diff[curcpu].idle;
@@ -4580,7 +4628,10 @@ static int cpu_view_proc_stat(const char *cg, const char *cpuset, struct cpuacct
 		}
 
 	} else {
-		for (curcpu = 0; curcpu < cpu_cnt; curcpu++) {
+		for (curcpu = 0; curcpu < nprocs; curcpu++) {
+			if (!stat_node->usage[curcpu].online)
+				continue;
+
 			stat_node->view[curcpu].user = stat_node->usage[curcpu].user;
 			stat_node->view[curcpu].system = stat_node->usage[curcpu].system;
 			stat_node->view[curcpu].idle = stat_node->usage[curcpu].idle;
@@ -4615,12 +4666,17 @@ static int cpu_view_proc_stat(const char *cg, const char *cpuset, struct cpuacct
 	total_len += l;
 
 	/* Render visible CPUs */
-	for (curcpu = 0; curcpu < cpu_cnt; curcpu++) {
-		if (max_cpus > 0 && curcpu == max_cpus)
+	for (curcpu = 0, i = -1; curcpu < nprocs; curcpu++) {
+		if (!stat_node->usage[curcpu].online)
+			continue;
+
+		i++;
+
+		if (max_cpus > 0 && i == max_cpus)
 			break;
 
 		l = snprintf(buf, buf_size, "cpu%d %lu 0 %lu %lu 0 0 0 0 0 0\n",
-				curcpu,
+				i,
 				stat_node->view[curcpu].user,
 				stat_node->view[curcpu].system,
 				stat_node->view[curcpu].idle);
@@ -4702,6 +4758,7 @@ static int proc_stat_read(char *buf, size_t size, off_t offset,
 	char *line = NULL;
 	size_t linelen = 0, total_len = 0, rv = 0;
 	int curcpu = -1; /* cpu numbering starts at 0 */
+	int physcpu = 0;
 	unsigned long user = 0, nice = 0, system = 0, idle = 0, iowait = 0, irq = 0, softirq = 0, steal = 0, guest = 0, guest_nice = 0;
 	unsigned long user_sum = 0, nice_sum = 0, system_sum = 0, idle_sum = 0, iowait_sum = 0,
 					irq_sum = 0, softirq_sum = 0, steal_sum = 0, guest_sum = 0, guest_nice_sum = 0;
@@ -4762,7 +4819,6 @@ static int proc_stat_read(char *buf, size_t size, off_t offset,
 
 	while (getline(&line, &linelen, f) != -1) {
 		ssize_t l;
-		int cpu;
 		char cpu_char[10]; /* That's a lot of cores */
 		char *c;
 		uint64_t all_used, cg_used, new_idle;
@@ -4789,9 +4845,9 @@ static int proc_stat_read(char *buf, size_t size, off_t offset,
 			continue;
 		}
 
-		if (sscanf(cpu_char, "%d", &cpu) != 1)
+		if (sscanf(cpu_char, "%d", &physcpu) != 1)
 			continue;
-		if (!cpu_in_cpuset(cpu, cpuset))
+		if (!cpu_in_cpuset(physcpu, cpuset))
 			continue;
 		curcpu ++;
 
@@ -4834,7 +4890,7 @@ static int proc_stat_read(char *buf, size_t size, off_t offset,
 
 		if (cg_cpu_usage) {
 			all_used = user + nice + system + iowait + irq + softirq + steal + guest + guest_nice;
-			cg_used = cg_cpu_usage[curcpu].user + cg_cpu_usage[curcpu].system;
+			cg_used = cg_cpu_usage[physcpu].user + cg_cpu_usage[physcpu].system;
 
 			if (all_used >= cg_used) {
 				new_idle = idle + (all_used - cg_used);
@@ -4847,7 +4903,7 @@ static int proc_stat_read(char *buf, size_t size, off_t offset,
 			}
 
 			l = snprintf(cache, cache_size, "cpu%d %lu 0 %lu %lu 0 0 0 0 0 0\n",
-					curcpu, cg_cpu_usage[curcpu].user, cg_cpu_usage[curcpu].system,
+					curcpu, cg_cpu_usage[physcpu].user, cg_cpu_usage[physcpu].system,
 					new_idle);
 
 			if (l < 0) {
@@ -4866,8 +4922,8 @@ static int proc_stat_read(char *buf, size_t size, off_t offset,
 			cache_size -= l;
 			total_len += l;
 
-			user_sum += cg_cpu_usage[curcpu].user;
-			system_sum += cg_cpu_usage[curcpu].system;
+			user_sum += cg_cpu_usage[physcpu].user;
+			system_sum += cg_cpu_usage[physcpu].system;
 			idle_sum += new_idle;
 
 		} else {


More information about the lxc-devel mailing list