[lxc-devel] [lxd/master] rebalance: don't use the cpu map from /proc/self/status
hallyn on Github
lxc-bot at linuxcontainers.org
Tue Apr 5 19:06:15 UTC 2016
A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 849 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20160405/dd4470c3/attachment.bin>
-------------- next part --------------
From 6e8af721eb5c9af63c0a1a8b562f8f8d4c6925e6 Mon Sep 17 00:00:00 2001
From: Serge Hallyn <serge.hallyn at ubuntu.com>
Date: Tue, 5 Apr 2016 13:54:32 -0500
Subject: [PATCH] rebalance: don't use the cpu map from /proc/self/status
it shows offline cpus. So as discussed on irc,
17:51 < stgraber> hallyn: so I think we should 1) get rid of
deviceGetCurrentCPUs 2) change deviceTaskBalance to read
/sys/fs/cgroup/cpuset/cpuset.effective_cpus 3) replicate it to
/lxc/cpuset.effective_cpus (not sure if we need that but I seem to remember
cpuset being weird) 4) use that list instead of deviceGetCurrentCPUs when
nothing is listed in limits.cpu
Add two simple helpers in lxd/cgroup.go to get/set cgroup files.
Signed-off-by: Serge Hallyn <serge.hallyn at ubuntu.com>
---
lxd/cgroup.go | 62 +++++++++++++++++++++
lxd/devices.go | 169 +++++++++++++++++++--------------------------------------
2 files changed, 119 insertions(+), 112 deletions(-)
create mode 100644 lxd/cgroup.go
diff --git a/lxd/cgroup.go b/lxd/cgroup.go
new file mode 100644
index 0000000..b6fca42
--- /dev/null
+++ b/lxd/cgroup.go
@@ -0,0 +1,62 @@
+package main
+
+import (
+ "bufio"
+ "io/ioutil"
+ "os"
+ "path"
+ "strings"
+
+)
+
+func getInitCgroupPath(controller string) string {
+ f, err := os.Open("/proc/1/cgroup")
+ if err != nil {
+ return "/"
+ }
+ defer f.Close()
+
+ scan := bufio.NewScanner(f)
+ for scan.Scan() {
+ line := scan.Text()
+
+ fields := strings.Split(line, ":")
+ if len(fields) != 3 {
+ return "/"
+ }
+
+ if fields[2] != controller {
+ continue
+ }
+
+ initPath := string(fields[3])
+
+ // ignore trailing /init.scope if it is there
+ dir, file := path.Split(initPath)
+ if file == "init.scope" {
+ return dir
+ } else {
+ return initPath
+ }
+ }
+
+ return "/"
+}
+
+func cGroupGet(controller, cgroup, file string) (string, error) {
+ initPath := getInitCgroupPath(controller)
+ path := path.Join("/sys/fs/cgroup", controller, initPath, cgroup, file)
+
+ contents, err := ioutil.ReadFile(path)
+ if err != nil {
+ return "", err
+ }
+ return strings.Trim(string(contents), "\n"), nil
+}
+
+func cGroupSet(controller, cgroup, file string, value string) error {
+ initPath := getInitCgroupPath(controller)
+ path := path.Join("/sys/fs/cgroup", controller, initPath, cgroup, file)
+
+ return ioutil.WriteFile(path, []byte(value), 0755)
+}
diff --git a/lxd/devices.go b/lxd/devices.go
index 4ea8a35..e92a02a 100644
--- a/lxd/devices.go
+++ b/lxd/devices.go
@@ -6,7 +6,6 @@ import (
"crypto/rand"
"encoding/hex"
"fmt"
- "io/ioutil"
"math/big"
"os"
"os/exec"
@@ -133,6 +132,46 @@ func deviceNetlinkListener() (chan []string, chan []string, error) {
return chCPU, chNetwork, nil
}
+func parseCpuset(cpu string) []int {
+ cpus := []int{}
+ chunks := strings.Split(cpu, ",")
+ for _, chunk := range chunks {
+ if strings.Contains(chunk, "-") {
+ // Range
+ fields := strings.SplitN(chunk, "-", 2)
+ if len(fields) != 2 {
+ shared.Log.Error("Invalid cpuset index: ", log.Ctx{"value": cpu})
+ continue
+ }
+
+ low, err := strconv.Atoi(fields[0])
+ if err != nil {
+ shared.Log.Error("Invalid limits.cpu value.", log.Ctx{"value": cpu})
+ continue
+ }
+
+ high, err := strconv.Atoi(fields[1])
+ if err != nil {
+ shared.Log.Error("Invalid limits.cpu value.", log.Ctx{"value": cpu})
+ continue
+ }
+
+ for i := low; i <= high; i++ {
+ cpus = append(cpus, i)
+ }
+ } else {
+ // Simple entry
+ nr, err := strconv.Atoi(chunk)
+ if err != nil {
+ shared.Log.Error("Invalid limits.cpu value.", log.Ctx{"value": cpu})
+ continue
+ }
+ cpus = append(cpus, nr)
+ }
+ }
+ return cpus
+}
+
func deviceTaskBalance(d *Daemon) {
min := func(x, y int) int {
if x < y {
@@ -146,41 +185,13 @@ func deviceTaskBalance(d *Daemon) {
return
}
- // Count CPUs
- cpus := []int{}
- dents, err := ioutil.ReadDir("/sys/bus/cpu/devices/")
+ // Get effective cpus list - those are all guaranteed to be online
+ effectiveCpus, err := cGroupGet("cpuset", "/", "cpuset.effective_cpus")
if err != nil {
- shared.Log.Error("balance: Unable to list CPUs", log.Ctx{"err": err})
return
}
-
- for _, f := range dents {
- id := -1
- count, err := fmt.Sscanf(f.Name(), "cpu%d", &id)
- if count != 1 || id == -1 {
- shared.Log.Error("balance: Bad CPU", log.Ctx{"path": f.Name()})
- continue
- }
-
- onlinePath := fmt.Sprintf("/sys/bus/cpu/devices/%s/online", f.Name())
- if !shared.PathExists(onlinePath) {
- // CPUs without an online file are non-hotplug so are always online
- cpus = append(cpus, id)
- continue
- }
-
- online, err := ioutil.ReadFile(onlinePath)
- if err != nil {
- shared.Log.Error("balance: Bad CPU", log.Ctx{"path": f.Name(), "err": err})
- continue
- }
-
- if online[0] == byte('0') {
- continue
- }
-
- cpus = append(cpus, id)
- }
+ cGroupSet("cpuset", "/lxc", "cpuset.effective_cpus", effectiveCpus)
+ cpus := parseCpuset(effectiveCpus)
// Iterate through the containers
containers, err := dbContainersList(d.db, cTypeRegular)
@@ -195,7 +206,7 @@ func deviceTaskBalance(d *Daemon) {
conf := c.ExpandedConfig()
cpu, ok := conf["limits.cpu"]
if !ok || cpu == "" {
- currentCPUs, err := deviceGetCurrentCPUs()
+ currentCPUs, err := cGroupGet("cpuset", "/lxc", "cpuset.effective_cpus")
if err != nil {
shared.Debugf("Couldn't get current CPU list: %s", err)
cpu = fmt.Sprintf("%d", len(cpus))
@@ -215,58 +226,20 @@ func deviceTaskBalance(d *Daemon) {
balancedContainers[c] = count
} else {
// Pinned
- chunks := strings.Split(cpu, ",")
- for _, chunk := range chunks {
- if strings.Contains(chunk, "-") {
- // Range
- fields := strings.SplitN(chunk, "-", 2)
- if len(fields) != 2 {
- shared.Log.Error("Invalid limits.cpu value.", log.Ctx{"container": c.Name(), "value": cpu})
- continue
- }
-
- low, err := strconv.Atoi(fields[0])
- if err != nil {
- shared.Log.Error("Invalid limits.cpu value.", log.Ctx{"container": c.Name(), "value": cpu})
- continue
- }
-
- high, err := strconv.Atoi(fields[1])
- if err != nil {
- shared.Log.Error("Invalid limits.cpu value.", log.Ctx{"container": c.Name(), "value": cpu})
- continue
- }
+ cpus := parseCpuset(cpu)
+ if err != nil {
+ return
+ }
+ for _, nr := range cpus {
+ if !shared.IntInSlice(nr, cpus) {
+ continue
+ }
- for i := low; i <= high; i++ {
- if !shared.IntInSlice(i, cpus) {
- continue
- }
-
- _, ok := fixedContainers[i]
- if ok {
- fixedContainers[i] = append(fixedContainers[i], c)
- } else {
- fixedContainers[i] = []container{c}
- }
- }
+ _, ok := fixedContainers[nr]
+ if ok {
+ fixedContainers[nr] = append(fixedContainers[nr], c)
} else {
- // Simple entry
- nr, err := strconv.Atoi(chunk)
- if err != nil {
- shared.Log.Error("Invalid limits.cpu value.", log.Ctx{"container": c.Name(), "value": cpu})
- continue
- }
-
- if !shared.IntInSlice(nr, cpus) {
- continue
- }
-
- _, ok := fixedContainers[nr]
- if ok {
- fixedContainers[nr] = append(fixedContainers[nr], c)
- } else {
- fixedContainers[nr] = []container{c}
- }
+ fixedContainers[nr] = []container{c}
}
}
}
@@ -333,34 +306,6 @@ func deviceTaskBalance(d *Daemon) {
}
}
-func deviceGetCurrentCPUs() (string, error) {
- // Open /proc/self/status
- f, err := os.Open("/proc/self/status")
- if err != nil {
- return "", err
- }
- defer f.Close()
-
- // Read it line by line
- scan := bufio.NewScanner(f)
- for scan.Scan() {
- line := scan.Text()
-
- // We only care about MemTotal
- if !strings.HasPrefix(line, "Cpus_allowed_list:") {
- continue
- }
-
- // Extract the before last (value) and last (unit) fields
- fields := strings.Split(line, "\t")
- value := fields[len(fields)-1]
-
- return value, nil
- }
-
- return "", fmt.Errorf("Couldn't find cpus_allowed_list")
-}
-
func deviceNetworkPriority(d *Daemon, netif string) {
// Don't bother running when CGroup support isn't there
if !cgNetPrioController {
More information about the lxc-devel
mailing list