[lxc-devel] [lxd/master] Device Infiniband resources package

tomponline on Github lxc-bot at linuxcontainers.org
Tue Aug 20 15:42:27 UTC 2019


A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 694 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20190820/5a9c5632/attachment.bin>
-------------- next part --------------
From 01ae0c217c3b97c1413325a55f367cc196dab4cb Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Tue, 20 Aug 2019 14:48:36 +0100
Subject: [PATCH 1/8] container/lxc: Removes volatile device keys when device
 is actually removed

Or when a device is updated such that its device type changes.

It will also ensure that if the device type remains the same but the new device's config replaces an old volatile key that the old volatile key is removed.

Fixes issue where hwaddr property was left even if device type changed.

Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
 lxd/container_lxc.go | 85 ++++++++++++++++++++++++--------------------
 1 file changed, 46 insertions(+), 39 deletions(-)

diff --git a/lxd/container_lxc.go b/lxd/container_lxc.go
index b06bc5d846..0e771bbcd5 100644
--- a/lxd/container_lxc.go
+++ b/lxd/container_lxc.go
@@ -2267,6 +2267,44 @@ func (c *containerLXC) deviceVolatileSetFunc(devName string) func(save map[strin
 	}
 }
 
+// deviceResetVolatile resets a device's volatile data when its removed or updated in such a way
+// that it is removed then added immediately afterwards.
+func (c *containerLXC) deviceResetVolatile(devName string, oldConfig, newConfig config.Device) error {
+	volatileClear := make(map[string]string)
+	devicePrefix := fmt.Sprintf("volatile.%s.", devName)
+
+	// If the device type has changed, remove all old volatile keys.
+	// This will occur if the newConfig is empty (i.e the device is actually being removed) or
+	// if the device type is being changed but keeping the same name.
+	if newConfig["type"] != oldConfig["type"] || newConfig["nictype"] != oldConfig["nictype"] {
+		for k := range c.localConfig {
+			if !strings.HasPrefix(k, fmt.Sprintf("volatile.%s.", devName)) {
+				continue
+			}
+
+			volatileClear[k] = ""
+		}
+
+		return c.VolatileSet(volatileClear)
+	}
+
+	// If the device type remains the same, then just remove any volatile keys that have
+	// the same key name present in the new config (i.e the new config is replacing the
+	// old volatile key).
+	for k := range c.localConfig {
+		if !strings.HasPrefix(k, devicePrefix) {
+			continue
+		}
+
+		devKey := strings.TrimPrefix(k, devicePrefix)
+		if _, found := newConfig[devKey]; found {
+			volatileClear[k] = ""
+		}
+	}
+
+	return c.VolatileSet(volatileClear)
+}
+
 // DeviceEventHandler actions the results of a RunConfig after an event has occurred on a device.
 func (c *containerLXC) DeviceEventHandler(runConf *device.RunConfig) error {
 	// Device events can only be processed when the container is running.
@@ -5085,45 +5123,6 @@ func (c *containerLXC) Update(args db.ContainerArgs, userRequested bool) error {
 		}
 	}
 
-	// Cleanup any leftover volatile entries
-	netNames := []string{}
-	for _, k := range c.expandedDevices.DeviceNames() {
-		v := c.expandedDevices[k]
-		if v["type"] == "nic" || v["type"] == "infiniband" {
-			netNames = append(netNames, k)
-		}
-	}
-
-	for k := range c.localConfig {
-		// We only care about volatile
-		if !strings.HasPrefix(k, "volatile.") {
-			continue
-		}
-
-		// Confirm it's a key of format volatile.<device>.<key>
-		fields := strings.SplitN(k, ".", 3)
-		if len(fields) != 3 {
-			continue
-		}
-
-		// The only device keys we care about are name and hwaddr
-		if !shared.StringInSlice(fields[2], []string{"name", "hwaddr", "host_name"}) {
-			continue
-		}
-
-		// Check if the device still exists
-		if shared.StringInSlice(fields[1], netNames) {
-			// Don't remove the volatile entry if the device doesn't have the matching field set
-			if c.expandedDevices[fields[1]][fields[2]] == "" {
-				continue
-			}
-		}
-
-		// Remove the volatile key from the in-memory configs
-		delete(c.localConfig, k)
-		delete(c.expandedConfig, k)
-	}
-
 	// Finally, apply the changes to the database
 	err = query.Retry(func() error {
 		tx, err := c.state.Cluster.Begin()
@@ -5277,6 +5276,14 @@ func (c *containerLXC) updateDevices(removeDevices map[string]config.Device, add
 		if err != nil && err != device.ErrUnsupportedDevType {
 			return errors.Wrapf(err, "Failed to remove device '%s'", k)
 		}
+
+		// Check whether we are about to add the same device back with updated config and
+		// if not, or if the device type has changed, then remove all volatile keys for
+		// this device (as its an actual removal or a device type change).
+		err = c.deviceResetVolatile(k, m, addDevices[k])
+		if err != nil {
+			return errors.Wrapf(err, "Failed to reset volatile data for device '%s'", k)
+		}
 	}
 
 	for k, m := range addDevices {

From 0911ec8a4201485ded68504ae2ebf13295afead5 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Tue, 20 Aug 2019 14:49:41 +0100
Subject: [PATCH 2/8] container/lxc: Removes reference to non-existent
 infiniband nictype

Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
 lxd/container_lxc.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lxd/container_lxc.go b/lxd/container_lxc.go
index 0e771bbcd5..223eabd617 100644
--- a/lxd/container_lxc.go
+++ b/lxd/container_lxc.go
@@ -7162,7 +7162,7 @@ func (c *containerLXC) fillNetworkDevice(name string, m config.Device) (config.D
 	}
 
 	// Fill in the MAC address
-	if !shared.StringInSlice(m["nictype"], []string{"physical", "ipvlan", "infiniband", "sriov"}) && m["hwaddr"] == "" {
+	if !shared.StringInSlice(m["nictype"], []string{"physical", "ipvlan", "sriov"}) && m["hwaddr"] == "" {
 		configKey := fmt.Sprintf("volatile.%s.hwaddr", name)
 		volatileHwaddr := c.localConfig[configKey]
 		if volatileHwaddr == "" {

From 4314e1bc6314526318f3917091881d50b8089edd Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Tue, 20 Aug 2019 14:51:17 +0100
Subject: [PATCH 3/8] device/device/utils/infiniband: Removes unused code after
 switch to resources package

Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
 lxd/device/device_utils_infiniband.go | 289 ++++++--------------------
 1 file changed, 59 insertions(+), 230 deletions(-)

diff --git a/lxd/device/device_utils_infiniband.go b/lxd/device/device_utils_infiniband.go
index 57f2f2e2c5..07d3fb7bae 100644
--- a/lxd/device/device_utils_infiniband.go
+++ b/lxd/device/device_utils_infiniband.go
@@ -1,266 +1,95 @@
 package device
 
 import (
-	"bytes"
 	"fmt"
-	"io/ioutil"
-	"os"
-	"strconv"
-	"strings"
 
 	"github.com/lxc/lxd/lxd/device/config"
 	"github.com/lxc/lxd/lxd/state"
-	"github.com/lxc/lxd/shared"
+	"github.com/lxc/lxd/shared/api"
 )
 
-// SCIB Infiniband sys class path.
-const SCIB = "/sys/class/infiniband"
-
-// SCNET Net sys class path.
-const SCNET = "/sys/class/net"
-
 // IBDevPrefix Infiniband devices prefix.
 const IBDevPrefix = "infiniband.unix"
 
-// IBF structure representing Infiniband function config.
-type IBF struct {
-	// port the function belongs to.
-	Port int64
-
-	// name of the {physical,virtual} function.
-	Fun string
-
-	// whether this is a physical (true) or virtual (false) function.
-	PF bool
-
-	// device of the function.
-	Device string
+// infinibandDevices extracts the infiniband parent device from the supplied nic list and any free
+// associated virtual functions (VFs) that are on the same card and port as the specified parent.
+// This function expects that the supplied nic list does not include VFs that are already attached
+// to running instances.
+func infinibandDevices(nics *api.ResourcesNetwork, parent string) map[string]*api.ResourcesNetworkCardPort {
+	ibDevs := make(map[string]*api.ResourcesNetworkCardPort)
+	for _, card := range nics.Cards {
+		for _, port := range card.Ports {
+			// Skip non-infiniband ports.
+			if port.Protocol != "infiniband" {
+				continue
+			}
 
-	// uverb device of the function.
-	PerPortDevices []string
-	PerFunDevices  []string
-}
+			// Skip port if not parent.
+			if port.ID != parent {
+				continue
+			}
 
-// infinibandLoadDevices inspects the system and returns information about all infiniband devices.
-func infinibandLoadDevices() (map[string]IBF, error) {
-	// check if there are any infiniband devices.
-	fscib, err := os.Open(SCIB)
-	if err != nil {
-		if os.IsNotExist(err) {
-			return nil, os.ErrNotExist
+			// Store infiniband port info.
+			ibDevs[port.ID] = &port
 		}
-		return nil, err
-	}
-	defer fscib.Close()
-
-	// eg.g. mlx_i for i = 0, 1, ..., n
-	IBDevNames, err := fscib.Readdirnames(-1)
-	if err != nil {
-		return nil, err
-	}
-
-	if len(IBDevNames) == 0 {
-		return nil, os.ErrNotExist
-	}
 
-	// Retrieve all network device names.
-	fscnet, err := os.Open(SCNET)
-	if err != nil {
-		if os.IsNotExist(err) {
-			return nil, os.ErrNotExist
+		// Skip virtual function (VF) extraction if SRIOV isn't supported on port.
+		if card.SRIOV == nil {
+			continue
 		}
-		return nil, err
-	}
-	defer fscnet.Close()
-
-	// Retrieve all network devices.
-	NetDevNames, err := fscnet.Readdirnames(-1)
-	if err != nil {
-		return nil, err
-	}
 
-	if len(NetDevNames) == 0 {
-		return nil, os.ErrNotExist
-	}
+		// Record if parent has been found as a physical function (PF).
+		parentDev, parentIsPF := ibDevs[parent]
 
-	UseableDevices := make(map[string]IBF)
-	for _, IBDevName := range IBDevNames {
-		IBDevResourceFile := fmt.Sprintf("/sys/class/infiniband/%s/device/resource", IBDevName)
-		IBDevResourceBuf, err := ioutil.ReadFile(IBDevResourceFile)
-		if err != nil {
-			return nil, err
-		}
-
-		for _, NetDevName := range NetDevNames {
-			NetDevResourceFile := fmt.Sprintf("/sys/class/net/%s/device/resource", NetDevName)
-			NetDevResourceBuf, err := ioutil.ReadFile(NetDevResourceFile)
-			if err != nil {
-				if os.IsNotExist(err) {
+		for _, VF := range card.SRIOV.VFs {
+			for _, port := range VF.Ports {
+				// Skip non-infiniband VFs.
+				if port.Protocol != "infiniband" {
 					continue
 				}
-				return nil, err
-			}
 
-			// If the device and the VF have the same address space
-			// they belong together.
-			if bytes.Compare(IBDevResourceBuf, NetDevResourceBuf) != 0 {
-				continue
-			}
-
-			// Now let's find the ports.
-			IBDevID := fmt.Sprintf("/sys/class/net/%s/dev_id", NetDevName)
-			IBDevPort := fmt.Sprintf("/sys/class/net/%s/dev_port", NetDevName)
-			DevIDBuf, err := ioutil.ReadFile(IBDevID)
-			if err != nil {
-				if os.IsNotExist(err) {
+				// Skip VF if parent is a PF and VF is not on same port as parent.
+				if parentIsPF && parentDev.Port != port.Port {
 					continue
 				}
-				return nil, err
-			}
-
-			DevIDString := strings.TrimSpace(string(DevIDBuf))
-			DevIDPort, err := strconv.ParseInt(DevIDString, 0, 64)
-			if err != nil {
-				return nil, err
-			}
-
-			DevPort := int64(0)
-			DevPortBuf, err := ioutil.ReadFile(IBDevPort)
-			if err != nil {
-				if !os.IsNotExist(err) {
-					return nil, err
-				}
-			} else {
-				DevPortString := strings.TrimSpace(string(DevPortBuf))
-				DevPort, err = strconv.ParseInt(DevPortString, 0, 64)
-				if err != nil {
-					return nil, err
-				}
-			}
-
-			Port := DevIDPort
-			if DevPort > DevIDPort {
-				Port = DevPort
-			}
-			Port++
-
-			NewIBF := IBF{
-				Port:   Port,
-				Fun:    IBDevName,
-				Device: NetDevName,
-			}
 
-			// Identify the /dev/infiniband/uverb<idx> device.
-			tmp := []string{}
-			IBUverb := fmt.Sprintf("/sys/class/net/%s/device/infiniband_verbs", NetDevName)
-			fuverb, err := os.Open(IBUverb)
-			if err != nil {
-				if !os.IsNotExist(err) {
-					return nil, err
-				}
-			} else {
-				defer fuverb.Close()
-
-				// Optional: retrieve all network devices.
-				tmp, err = fuverb.Readdirnames(-1)
-				if err != nil {
-					return nil, err
-				}
-
-				if len(tmp) == 0 {
-					return nil, os.ErrNotExist
-				}
-			}
-			for _, v := range tmp {
-				if strings.Index(v, "-") != -1 {
-					return nil, fmt.Errorf("Infiniband character device \"%s\" contains \"-\". Cannot guarantee unique encoding", v)
-				}
-				NewIBF.PerPortDevices = append(NewIBF.PerPortDevices, v)
-			}
-
-			// Identify the /dev/infiniband/ucm<idx> device.
-			tmp = []string{}
-			IBcm := fmt.Sprintf("/sys/class/net/%s/device/infiniband_ucm", NetDevName)
-			fcm, err := os.Open(IBcm)
-			if err != nil {
-				if !os.IsNotExist(err) {
-					return nil, err
-				}
-			} else {
-				defer fcm.Close()
-
-				// Optional: retrieve all network devices.
-				tmp, err = fcm.Readdirnames(-1)
-				if err != nil {
-					return nil, err
-				}
-
-				if len(tmp) == 0 {
-					return nil, os.ErrNotExist
-				}
-			}
-			for _, v := range tmp {
-				if strings.Index(v, "-") != -1 {
-					return nil, fmt.Errorf("Infiniband character device \"%s\" contains \"-\". Cannot guarantee unique encoding", v)
-				}
-				devPath := fmt.Sprintf("/dev/infiniband/%s", v)
-				NewIBF.PerPortDevices = append(NewIBF.PerPortDevices, devPath)
-			}
-
-			// Identify the /dev/infiniband/{issm,umad}<idx> devices.
-			IBmad := fmt.Sprintf("/sys/class/net/%s/device/infiniband_mad", NetDevName)
-			ents, err := ioutil.ReadDir(IBmad)
-			if err != nil {
-				if !os.IsNotExist(err) {
-					return nil, err
+				// Skip VF if parent isn't a PF and VF doesn't match parent name.
+				if !parentIsPF && port.ID != parent {
+					continue
 				}
-			} else {
-				for _, ent := range ents {
-					IBmadPort := fmt.Sprintf("%s/%s/port", IBmad, ent.Name())
-					portBuf, err := ioutil.ReadFile(IBmadPort)
-					if err != nil {
-						if !os.IsNotExist(err) {
-							return nil, err
-						}
-						continue
-					}
-
-					portStr := strings.TrimSpace(string(portBuf))
-					PortMad, err := strconv.ParseInt(portStr, 0, 64)
-					if err != nil {
-						return nil, err
-					}
-
-					if PortMad != NewIBF.Port {
-						continue
-					}
 
-					if strings.Index(ent.Name(), "-") != -1 {
-						return nil, fmt.Errorf("Infiniband character device \"%s\" contains \"-\". Cannot guarantee unique encoding", ent.Name())
-					}
-
-					NewIBF.PerFunDevices = append(NewIBF.PerFunDevices, ent.Name())
-				}
+				// Store infiniband VF port info.
+				ibDevs[port.ID] = &port
 			}
-
-			// Figure out whether this is a physical function.
-			IBPF := fmt.Sprintf("/sys/class/net/%s/device/physfn", NetDevName)
-			NewIBF.PF = !shared.PathExists(IBPF)
-
-			UseableDevices[NetDevName] = NewIBF
 		}
 	}
 
-	return UseableDevices, nil
+	return ibDevs
 }
 
 // infinibandAddDevices creates the UNIX devices for the provided IBF device and then configures the
 // supplied runConfig with the Cgroup rules and mount instructions to pass the device into instance.
-func infinibandAddDevices(s *state.State, devicesPath string, deviceName string, ifDev *IBF, runConf *RunConfig) error {
-	for _, unixCharDev := range ifDev.PerPortDevices {
-		destPath := fmt.Sprintf("/dev/infiniband/%s", unixCharDev)
+func infinibandAddDevices(s *state.State, devicesPath string, deviceName string, ibDev *api.ResourcesNetworkCardPort, runConf *RunConfig) error {
+	if ibDev.Infiniband == nil {
+		return fmt.Errorf("No infiniband devices supplied")
+	}
+
+	// Add IsSM device if defined.
+	if ibDev.Infiniband.IsSMName != "" {
+		dummyDevice := config.Device{
+			"source": fmt.Sprintf("/dev/infiniband/%s", ibDev.Infiniband.IsSMName),
+		}
+
+		err := unixDeviceSetup(s, devicesPath, IBDevPrefix, deviceName, dummyDevice, false, runConf)
+		if err != nil {
+			return err
+		}
+	}
+
+	// Add MAD device if defined.
+	if ibDev.Infiniband.MADName != "" {
 		dummyDevice := config.Device{
-			"source": destPath,
+			"source": fmt.Sprintf("/dev/infiniband/%s", ibDev.Infiniband.MADName),
 		}
 
 		err := unixDeviceSetup(s, devicesPath, IBDevPrefix, deviceName, dummyDevice, false, runConf)
@@ -269,10 +98,10 @@ func infinibandAddDevices(s *state.State, devicesPath string, deviceName string,
 		}
 	}
 
-	for _, unixCharDev := range ifDev.PerFunDevices {
-		destPath := fmt.Sprintf("/dev/infiniband/%s", unixCharDev)
+	// Add Verb device if defined.
+	if ibDev.Infiniband.VerbName != "" {
 		dummyDevice := config.Device{
-			"source": destPath,
+			"source": fmt.Sprintf("/dev/infiniband/%s", ibDev.Infiniband.VerbName),
 		}
 
 		err := unixDeviceSetup(s, devicesPath, IBDevPrefix, deviceName, dummyDevice, false, runConf)

From c3d43ab22a48f222ef45a0c5972c6762fb005042 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Tue, 20 Aug 2019 14:52:01 +0100
Subject: [PATCH 4/8] device/device/utils/network: Removes
 NetworkSRIOVGetFreeVFInterface

No longer shared by NIC SR-IOV and Infiniband SR-IOV devices since Infiniband switches to use resources package.

This function will be moved to nic sriov implementation internally.

Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
 lxd/device/device_utils_network.go | 40 ------------------------------
 1 file changed, 40 deletions(-)

diff --git a/lxd/device/device_utils_network.go b/lxd/device/device_utils_network.go
index dbf4c9501d..2e0b0ff13c 100644
--- a/lxd/device/device_utils_network.go
+++ b/lxd/device/device_utils_network.go
@@ -2,7 +2,6 @@ package device
 
 import (
 	"bufio"
-	"bytes"
 	"crypto/rand"
 	"encoding/hex"
 	"fmt"
@@ -655,45 +654,6 @@ func NetworkValidNetworkV6List(value string) error {
 	return nil
 }
 
-// NetworkSRIOVGetFreeVFInterface checks the contents of the VF directory to find a free VF
-// interface name that belongs to the same device and port as the parent.
-// Returns VF interface name or empty string if no free interface found.
-func NetworkSRIOVGetFreeVFInterface(reservedDevices map[string]struct{}, vfListPath string, pfDevID []byte, pfDevPort []byte) (string, error) {
-	ents, err := ioutil.ReadDir(vfListPath)
-	if err != nil {
-		return "", err
-	}
-
-	for _, ent := range ents {
-		// We can't use this VF interface as it is reserved by another device.
-		_, exists := reservedDevices[ent.Name()]
-		if exists {
-			continue
-		}
-
-		// Get VF dev_port and dev_id values.
-		vfDevPort, err := ioutil.ReadFile(fmt.Sprintf("%s/%s/dev_port", vfListPath, ent.Name()))
-		if err != nil {
-			return "", err
-		}
-
-		vfDevID, err := ioutil.ReadFile(fmt.Sprintf("%s/%s/dev_id", vfListPath, ent.Name()))
-		if err != nil {
-			return "", err
-		}
-
-		// Skip VFs if they do not relate to the same device and port as the parent PF.
-		// Some card vendors change the device ID for each port.
-		if bytes.Compare(pfDevPort, vfDevPort) != 0 || bytes.Compare(pfDevID, vfDevID) != 0 {
-			continue
-		}
-
-		return ent.Name(), nil
-	}
-
-	return "", nil
-}
-
 // networkParsePortRange validates a port range in the form n-n.
 func networkParsePortRange(r string) (int64, int64, error) {
 	entries := strings.Split(r, "-")

From 0669784202a5b4160556df04de9164dd19663c6b Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Tue, 20 Aug 2019 14:53:17 +0100
Subject: [PATCH 5/8] device/infiniband/physical: Switches to use resources
 package for device probing

Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
 lxd/device/infiniband_physical.go | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/lxd/device/infiniband_physical.go b/lxd/device/infiniband_physical.go
index dc5f8ee6a5..70e21af1e5 100644
--- a/lxd/device/infiniband_physical.go
+++ b/lxd/device/infiniband_physical.go
@@ -5,6 +5,7 @@ import (
 
 	"github.com/lxc/lxd/lxd/device/config"
 	"github.com/lxc/lxd/lxd/instance"
+	"github.com/lxc/lxd/lxd/resources"
 	"github.com/lxc/lxd/shared"
 )
 
@@ -54,17 +55,21 @@ func (d *infinibandPhysical) Start() (*RunConfig, error) {
 
 	saveData := make(map[string]string)
 
-	devices, err := infinibandLoadDevices()
+	// Load network interface info.
+	nics, err := resources.GetNetwork()
 	if err != nil {
 		return nil, err
 	}
 
-	saveData["host_name"] = d.config["parent"]
-	ifDev, ok := devices[saveData["host_name"]]
-	if !ok {
-		return nil, fmt.Errorf("Specified infiniband device \"%s\" not found", saveData["host_name"])
+	// Filter the network interfaces to just infiniband devices related to parent.
+	ibDevs := infinibandDevices(nics, d.config["parent"])
+	ibDev, found := ibDevs[d.config["parent"]]
+	if !found {
+		return nil, fmt.Errorf("Specified infiniband device \"%s\" not found", d.config["parent"])
 	}
 
+	saveData["host_name"] = ibDev.ID
+
 	// Record hwaddr and mtu before potentially modifying them.
 	err = networkSnapshotPhysicalNic(saveData["host_name"], saveData)
 	if err != nil {
@@ -90,7 +95,7 @@ func (d *infinibandPhysical) Start() (*RunConfig, error) {
 	runConf := RunConfig{}
 
 	// Configure runConf with infiniband setup instructions.
-	err = infinibandAddDevices(d.state, d.instance.DevicesPath(), d.name, &ifDev, &runConf)
+	err = infinibandAddDevices(d.state, d.instance.DevicesPath(), d.name, ibDev, &runConf)
 	if err != nil {
 		return nil, err
 	}

From 876bbaee629c9da585025643dda8c245e69015b7 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Tue, 20 Aug 2019 14:53:42 +0100
Subject: [PATCH 6/8] device/infiniband/sriov: Switches to use resources
 package for device probing

Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
 lxd/device/infiniband_sriov.go | 106 ++++++++-------------------------
 1 file changed, 26 insertions(+), 80 deletions(-)

diff --git a/lxd/device/infiniband_sriov.go b/lxd/device/infiniband_sriov.go
index 074fdf6ec3..675aacd9a4 100644
--- a/lxd/device/infiniband_sriov.go
+++ b/lxd/device/infiniband_sriov.go
@@ -2,13 +2,12 @@ package device
 
 import (
 	"fmt"
-	"io/ioutil"
-	"strconv"
-	"strings"
 
 	"github.com/lxc/lxd/lxd/device/config"
 	"github.com/lxc/lxd/lxd/instance"
+	"github.com/lxc/lxd/lxd/resources"
 	"github.com/lxc/lxd/shared"
+	"github.com/lxc/lxd/shared/api"
 )
 
 type infinibandSRIOV struct {
@@ -57,27 +56,42 @@ func (d *infinibandSRIOV) Start() (*RunConfig, error) {
 
 	saveData := make(map[string]string)
 
-	devices, err := infinibandLoadDevices()
+	// Load network interface info.
+	nics, err := resources.GetNetwork()
 	if err != nil {
 		return nil, err
 	}
 
+	// Filter the network interfaces to just infiniband devices related to parent.
+	ibDevs := infinibandDevices(nics, d.config["parent"])
+
+	// We don't count the parent as an available VF.
+	delete(ibDevs, d.config["parent"])
+
+	// Load any interfaces already allocated to other devices.
 	reservedDevices, err := instanceGetReservedDevices(d.state, d.config)
 	if err != nil {
 		return nil, err
 	}
 
-	vfDev, err := d.findFreeVirtualFunction(reservedDevices)
-	if err != nil {
-		return nil, err
+	// Remove reserved devices from available list.
+	for k := range reservedDevices {
+		delete(ibDevs, k)
+	}
+
+	if len(ibDevs) < 1 {
+		return nil, fmt.Errorf("All virtual functions on parent device are already in use")
 	}
 
-	saveData["host_name"] = vfDev
-	ifDev, ok := devices[saveData["host_name"]]
-	if !ok {
-		return nil, fmt.Errorf("Specified infiniband device \"%s\" not found", saveData["host_name"])
+	// Get first VF device that is free.
+	var vfDev *api.ResourcesNetworkCardPort
+	for _, v := range ibDevs {
+		vfDev = v
+		break
 	}
 
+	saveData["host_name"] = vfDev.ID
+
 	// Record hwaddr and mtu before potentially modifying them.
 	err = networkSnapshotPhysicalNic(saveData["host_name"], saveData)
 	if err != nil {
@@ -103,7 +117,7 @@ func (d *infinibandSRIOV) Start() (*RunConfig, error) {
 	runConf := RunConfig{}
 
 	// Configure runConf with infiniband setup instructions.
-	err = infinibandAddDevices(d.state, d.instance.DevicesPath(), d.name, &ifDev, &runConf)
+	err = infinibandAddDevices(d.state, d.instance.DevicesPath(), d.name, vfDev, &runConf)
 	if err != nil {
 		return nil, err
 	}
@@ -123,74 +137,6 @@ func (d *infinibandSRIOV) Start() (*RunConfig, error) {
 	return &runConf, nil
 }
 
-// findFreeVirtualFunction looks on the specified parent device for an unused virtual function.
-// Returns the name of the interface if found, error if not.
-func (d *infinibandSRIOV) findFreeVirtualFunction(reservedDevices map[string]struct{}) (string, error) {
-	sriovNumVFs := fmt.Sprintf("/sys/class/net/%s/device/sriov_numvfs", d.config["parent"])
-	sriovTotalVFs := fmt.Sprintf("/sys/class/net/%s/device/sriov_totalvfs", d.config["parent"])
-
-	// Verify that this is indeed a SR-IOV enabled device.
-	if !shared.PathExists(sriovTotalVFs) {
-		return "", fmt.Errorf("Parent device '%s' doesn't support SR-IOV", d.config["parent"])
-	}
-
-	// Get parent dev_port and dev_id values.
-	pfDevPort, err := ioutil.ReadFile(fmt.Sprintf("/sys/class/net/%s/dev_port", d.config["parent"]))
-	if err != nil {
-		return "", err
-	}
-
-	pfDevID, err := ioutil.ReadFile(fmt.Sprintf("/sys/class/net/%s/dev_id", d.config["parent"]))
-	if err != nil {
-		return "", err
-	}
-
-	// Get number of currently enabled VFs.
-	sriovNumVfsBuf, err := ioutil.ReadFile(sriovNumVFs)
-	if err != nil {
-		return "", err
-	}
-	sriovNumVfsStr := strings.TrimSpace(string(sriovNumVfsBuf))
-	sriovNum, err := strconv.Atoi(sriovNumVfsStr)
-	if err != nil {
-		return "", err
-	}
-
-	// Check if any VFs are already enabled.
-	nicName := ""
-	for i := 0; i < sriovNum; i++ {
-		if !shared.PathExists(fmt.Sprintf("/sys/class/net/%s/device/virtfn%d/net", d.config["parent"], i)) {
-			continue
-		}
-
-		// Check if VF is already in use.
-		empty, err := shared.PathIsEmpty(fmt.Sprintf("/sys/class/net/%s/device/virtfn%d/net", d.config["parent"], i))
-		if err != nil {
-			return "", err
-		}
-		if empty {
-			continue
-		}
-
-		vfListPath := fmt.Sprintf("/sys/class/net/%s/device/virtfn%d/net", d.config["parent"], i)
-		nicName, err = NetworkSRIOVGetFreeVFInterface(reservedDevices, vfListPath, pfDevID, pfDevPort)
-		if err != nil {
-			return "", err
-		}
-
-		// Found a free VF.
-		if nicName != "" {
-			break
-		}
-	}
-
-	if nicName == "" {
-		return "", fmt.Errorf("All virtual functions on parent device are already in use")
-	}
-
-	return nicName, nil
-}
-
 // Stop is run when the device is removed from the instance.
 func (d *infinibandSRIOV) Stop() (*RunConfig, error) {
 	v := d.volatileGet()

From f956fe3d483af2ee143ee202040afadd7725fc53 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Tue, 20 Aug 2019 14:54:49 +0100
Subject: [PATCH 7/8] device/nic/sriov: Adds getFreeVFInterface after moving
 from shared utils

No longer needed by Infiniband SR-IOV device.

Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
 lxd/device/nic_sriov.go | 44 +++++++++++++++++++++++++++++++++++++++--
 1 file changed, 42 insertions(+), 2 deletions(-)

diff --git a/lxd/device/nic_sriov.go b/lxd/device/nic_sriov.go
index 57186751f2..d34536e943 100644
--- a/lxd/device/nic_sriov.go
+++ b/lxd/device/nic_sriov.go
@@ -2,6 +2,7 @@ package device
 
 import (
 	"bufio"
+	"bytes"
 	"fmt"
 	"io/ioutil"
 	"os"
@@ -224,7 +225,7 @@ func (d *nicSRIOV) findFreeVirtualFunction(reservedDevices map[string]struct{})
 		}
 
 		vfListPath := fmt.Sprintf("/sys/class/net/%s/device/virtfn%d/net", d.config["parent"], i)
-		nicName, err = NetworkSRIOVGetFreeVFInterface(reservedDevices, vfListPath, pfDevID, pfDevPort)
+		nicName, err = d.getFreeVFInterface(reservedDevices, vfListPath, pfDevID, pfDevPort)
 		if err != nil {
 			return "", 0, err
 		}
@@ -250,7 +251,7 @@ func (d *nicSRIOV) findFreeVirtualFunction(reservedDevices map[string]struct{})
 		// Use next free VF index.
 		for i := sriovNum + 1; i < sriovTotal; i++ {
 			vfListPath := fmt.Sprintf("/sys/class/net/%s/device/virtfn%d/net", d.config["parent"], i)
-			nicName, err = NetworkSRIOVGetFreeVFInterface(reservedDevices, vfListPath, pfDevID, pfDevPort)
+			nicName, err = d.getFreeVFInterface(reservedDevices, vfListPath, pfDevID, pfDevPort)
 			if err != nil {
 				return "", 0, err
 			}
@@ -270,6 +271,45 @@ func (d *nicSRIOV) findFreeVirtualFunction(reservedDevices map[string]struct{})
 	return nicName, vfID, nil
 }
 
+// getFreeVFInterface checks the contents of the VF directory to find a free VF interface name that
+// belongs to the same device and port as the parent. Returns VF interface name or empty string if
+// no free interface found.
+func (d *nicSRIOV) getFreeVFInterface(reservedDevices map[string]struct{}, vfListPath string, pfDevID []byte, pfDevPort []byte) (string, error) {
+	ents, err := ioutil.ReadDir(vfListPath)
+	if err != nil {
+		return "", err
+	}
+
+	for _, ent := range ents {
+		// We can't use this VF interface as it is reserved by another device.
+		_, exists := reservedDevices[ent.Name()]
+		if exists {
+			continue
+		}
+
+		// Get VF dev_port and dev_id values.
+		vfDevPort, err := ioutil.ReadFile(fmt.Sprintf("%s/%s/dev_port", vfListPath, ent.Name()))
+		if err != nil {
+			return "", err
+		}
+
+		vfDevID, err := ioutil.ReadFile(fmt.Sprintf("%s/%s/dev_id", vfListPath, ent.Name()))
+		if err != nil {
+			return "", err
+		}
+
+		// Skip VFs if they do not relate to the same device and port as the parent PF.
+		// Some card vendors change the device ID for each port.
+		if bytes.Compare(pfDevPort, vfDevPort) != 0 || bytes.Compare(pfDevID, vfDevID) != 0 {
+			continue
+		}
+
+		return ent.Name(), nil
+	}
+
+	return "", nil
+}
+
 // setupSriovParent configures a SR-IOV virtual function (VF) device on parent and stores original
 // properties of the physical device into voltatile for restoration on detach.
 func (d *nicSRIOV) setupSriovParent(vfDevice string, vfID int, volatile map[string]string) error {

From 5b68e82cb85a2da95ea260c862457df4aa53b26c Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Tue, 20 Aug 2019 14:55:31 +0100
Subject: [PATCH 8/8] test: Removes MAC tests from infiniband tests

MAC support in Infiniband needs reworking and the tests can be re-added then.

The first 12 bytes of the infiniband MAC address cannot be changed, only the last 8 bytes.

With the new Infiniband SR-IOV implementation the selected VF device is chosen at random, meaning the tests which were formerly built to expect a specific interface on lantea will now no longer work.

Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
 test/suites/container_devices_infiniband_physical.sh |  8 ++------
 test/suites/container_devices_infiniband_sriov.sh    | 12 +++---------
 2 files changed, 5 insertions(+), 15 deletions(-)

diff --git a/test/suites/container_devices_infiniband_physical.sh b/test/suites/container_devices_infiniband_physical.sh
index 227af7be49..04605ed571 100644
--- a/test/suites/container_devices_infiniband_physical.sh
+++ b/test/suites/container_devices_infiniband_physical.sh
@@ -19,8 +19,6 @@ test_container_devices_ib_physical() {
   fi
 
   ctName="nt$$"
-  macRand=$(shuf -i 0-9 -n 1)
-  ctMAC1="a0:00:0a:c0:fe:80:00:00:00:00:00:00:a2:44:3c:1f:b0:15:e2:f${macRand}"
 
   # Record how many nics we started with.
   startNicCount=$(find /sys/class/net | wc -l)
@@ -30,8 +28,7 @@ test_container_devices_ib_physical() {
   lxc config device add "${ctName}" eth0 infiniband \
     nictype=physical \
     parent="${parent}" \
-    mtu=1500 \
-    hwaddr="${ctMAC1}"
+    mtu=1500
   lxc start "${ctName}"
 
   # Check host devices are created.
@@ -103,8 +100,7 @@ test_container_devices_ib_physical() {
   lxc config device add "${ctName}" eth0 infiniband \
     nictype=physical \
     parent="${parent}" \
-    mtu=1500 \
-    hwaddr="${ctMAC1}"
+    mtu=1500
 
   # Check host devices are created.
   ibDevCount=$(find "${LXD_DIR}"/devices/"${ctName}" -type c | wc -l)
diff --git a/test/suites/container_devices_infiniband_sriov.sh b/test/suites/container_devices_infiniband_sriov.sh
index 7d15675655..3cf17d1f86 100644
--- a/test/suites/container_devices_infiniband_sriov.sh
+++ b/test/suites/container_devices_infiniband_sriov.sh
@@ -19,9 +19,6 @@ test_container_devices_ib_sriov() {
   fi
 
   ctName="nt$$"
-  macRand=$(shuf -i 0-9 -n 1)
-  ctMAC1="a0:00:0a:a0:fe:80:00:00:00:00:00:00:96:29:52:03:73:4b:81:e${macRand}"
-  ctMAC2="a0:00:0a:c0:fe:80:00:00:00:00:00:00:a2:44:3c:1f:b0:15:e2:f${macRand}"
 
   # Set a known start point config
   ip link set "${parent}" up
@@ -34,13 +31,11 @@ test_container_devices_ib_sriov() {
   lxc config device add "${ctName}" eth0 infiniband \
     nictype=sriov \
     parent="${parent}" \
-    mtu=1500 \
-    hwaddr="${ctMAC1}"
+    mtu=1500
   lxc config device add "${ctName}" eth1 infiniband \
     nictype=sriov \
     parent="${parent}" \
-    mtu=1500 \
-    hwaddr="${ctMAC2}"
+    mtu=1500
   lxc start "${ctName}"
 
   # Check host devices are created.
@@ -113,8 +108,7 @@ test_container_devices_ib_sriov() {
   lxc config device add "${ctName}" eth0 infiniband \
     nictype=sriov \
     parent="${parent}" \
-    mtu=1500 \
-    hwaddr="${ctMAC1}"
+    mtu=1500
 
   # Check host devices are created.
   ibDevCount=$(find "${LXD_DIR}"/devices/"${ctName}" -type c | wc -l)


More information about the lxc-devel mailing list