[lxc-devel] [RFC PATCH] cgroup, cpuset: add cpuset.remap_cpus

Wolfgang Bumiller w.bumiller at proxmox.com
Thu Dec 22 15:07:51 UTC 2016


Changes a cpuset, recursively remapping all its descendants
to the new range.

Signed-off-by: Wolfgang Bumiller <w.bumiller at proxmox.com>
---
Currently once a cpuset cgroup has a subdirectory it's impossible to
remove cpu without manually recursing through the cgroup file system.
The problem gets worse if you want to remap cpus of a larger subtree.
This is particularly useful with containers and problematic in that
the recursion might race against the creation of new subdirectories.

I'm not sure why this functionality isn't there yet and thought I'd
give it a try and send an RFC patch. I'm sure there's a reason though,
given how surprisingly small/simple the patch turned out to be and
I'm rarely the first to think of a feature like that ;-)

I hope this is something we could add one way or another, if possible
required changes to the patch are within the scope of my abilities.

 include/linux/cpumask.h | 17 ++++++++++++++++
 kernel/cpuset.c         | 54 +++++++++++++++++++++++++++++++++++++++----------
 2 files changed, 60 insertions(+), 11 deletions(-)

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 59915ea..f5487c8 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -514,6 +514,23 @@ static inline void cpumask_copy(struct cpumask *dstp,
 }
 
 /**
+ * cpumask_remap - *dstp = map(old, new)(*srcp)
+ * @dstp: the result
+ * @srcp: the input cpumask
+ * @oldp: the old mask
+ * @newp: the new mask
+ */
+static inline void cpumask_remap(struct cpumask *dstp,
+				 const struct cpumask *srcp,
+				 const struct cpumask *oldp,
+				 const struct cpumask *newp)
+{
+	bitmap_remap(cpumask_bits(dstp), cpumask_bits(srcp),
+		     cpumask_bits(oldp), cpumask_bits(newp),
+		     nr_cpumask_bits);
+}
+
+/**
  * cpumask_any - pick a "random" cpu from *srcp
  * @srcp: the input cpumask
  *
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 02a8ea5..22d0cb2 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -450,7 +450,8 @@ static void free_trial_cpuset(struct cpuset *trial)
  * Return 0 if valid, -errno if not.
  */
 
-static int validate_change(struct cpuset *cur, struct cpuset *trial)
+static int validate_change(struct cpuset *cur, struct cpuset *trial,
+			   bool remap)
 {
 	struct cgroup_subsys_state *css;
 	struct cpuset *c, *par;
@@ -458,11 +459,13 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial)
 
 	rcu_read_lock();
 
-	/* Each of our child cpusets must be a subset of us */
-	ret = -EBUSY;
-	cpuset_for_each_child(c, css, cur)
-		if (!is_cpuset_subset(c, trial))
-			goto out;
+	if (!remap) {
+		/* Each of our child cpusets must be a subset of us */
+		ret = -EBUSY;
+		cpuset_for_each_child(c, css, cur)
+			if (!is_cpuset_subset(c, trial))
+				goto out;
+	}
 
 	/* Remaining checks don't apply to root cpuset */
 	ret = 0;
@@ -925,11 +928,15 @@ static void update_cpumasks_hier(struct cpuset *cs, struct cpumask *new_cpus)
  * @cs: the cpuset to consider
  * @trialcs: trial cpuset
  * @buf: buffer of cpu numbers written to this cpuset
+ * @remap: recursively remap all child nodes
  */
 static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
-			  const char *buf)
+			  const char *buf, bool remap)
 {
 	int retval;
+	struct cpuset *cp;
+	struct cgroup_subsys_state *pos_css;
+	struct cpumask tempmask;
 
 	/* top_cpuset.cpus_allowed tracks cpu_online_mask; it's read-only */
 	if (cs == &top_cpuset)
@@ -957,11 +964,25 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
 	if (cpumask_equal(cs->cpus_allowed, trialcs->cpus_allowed))
 		return 0;
 
-	retval = validate_change(cs, trialcs);
+	retval = validate_change(cs, trialcs, remap);
 	if (retval < 0)
 		return retval;
 
 	spin_lock_irq(&callback_lock);
+	if (remap) {
+		rcu_read_lock();
+		cpuset_for_each_descendant_pre(cp, pos_css, cs) {
+			/* skip empty subtrees */
+			if (cpumask_empty(cp->cpus_allowed)) {
+				pos_css = css_rightmost_descendant(pos_css);
+				continue;
+			}
+			cpumask_copy(&tempmask, cp->cpus_allowed);
+			cpumask_remap(cp->cpus_allowed, &tempmask,
+				      cs->cpus_allowed, trialcs->cpus_allowed);
+		}
+		rcu_read_unlock();
+	}
 	cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed);
 	spin_unlock_irq(&callback_lock);
 
@@ -1217,7 +1238,7 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
 		retval = 0;		/* Too easy - nothing to do */
 		goto done;
 	}
-	retval = validate_change(cs, trialcs);
+	retval = validate_change(cs, trialcs, false);
 	if (retval < 0)
 		goto done;
 
@@ -1304,7 +1325,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
 	else
 		clear_bit(bit, &trialcs->flags);
 
-	err = validate_change(cs, trialcs);
+	err = validate_change(cs, trialcs, false);
 	if (err < 0)
 		goto out;
 
@@ -1563,6 +1584,7 @@ static void cpuset_attach(struct cgroup_taskset *tset)
 typedef enum {
 	FILE_MEMORY_MIGRATE,
 	FILE_CPULIST,
+	FILE_REMAP_CPULIST,
 	FILE_MEMLIST,
 	FILE_EFFECTIVE_CPULIST,
 	FILE_EFFECTIVE_MEMLIST,
@@ -1695,7 +1717,10 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
 
 	switch (of_cft(of)->private) {
 	case FILE_CPULIST:
-		retval = update_cpumask(cs, trialcs, buf);
+		retval = update_cpumask(cs, trialcs, buf, false);
+		break;
+	case FILE_REMAP_CPULIST:
+		retval = update_cpumask(cs, trialcs, buf, true);
 		break;
 	case FILE_MEMLIST:
 		retval = update_nodemask(cs, trialcs, buf);
@@ -1811,6 +1836,13 @@ static struct cftype files[] = {
 	},
 
 	{
+		.name = "remap_cpus",
+		.write = cpuset_write_resmask,
+		.max_write_len = (100U + 6 * NR_CPUS),
+		.private = FILE_REMAP_CPULIST,
+	},
+
+	{
 		.name = "mems",
 		.seq_show = cpuset_common_seq_show,
 		.write = cpuset_write_resmask,
-- 
2.1.4




More information about the lxc-devel mailing list