[lxc-devel] [lxd/master] api: add "infiniband" api extension

brauner on Github lxc-bot at linuxcontainers.org
Fri Dec 1 22:56:00 UTC 2017


A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 364 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20171201/e99aee5e/attachment.bin>
-------------- next part --------------
From 3418803b9b0be3f0509b096d2dfd4ce2c43c9051 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Fri, 17 Nov 2017 15:56:11 +0100
Subject: [PATCH 1/4] devices: add "infiniband" device type

Closes #3983.

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 lxd/container.go  | 29 ++++++++++++++++++++++++++++-
 lxd/db/devices.go |  4 ++++
 2 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/lxd/container.go b/lxd/container.go
index 4515d9c3a..4d50dbecf 100644
--- a/lxd/container.go
+++ b/lxd/container.go
@@ -199,6 +199,21 @@ func containerValidDeviceConfigKey(t, k string) bool {
 		default:
 			return false
 		}
+	case "infiniband":
+		switch k {
+		case "hwaddr":
+			return true
+		case "mtu":
+			return true
+		case "name":
+			return true
+		case "nictype":
+			return true
+		case "parent":
+			return true
+		default:
+			return false
+		}
 	case "none":
 		return false
 	default:
@@ -290,7 +305,7 @@ func containerValidDevices(db *db.Node, devices types.Devices, profile bool, exp
 			return fmt.Errorf("Missing device type for device '%s'", name)
 		}
 
-		if !shared.StringInSlice(m["type"], []string{"none", "nic", "disk", "unix-char", "unix-block", "usb", "gpu"}) {
+		if !shared.StringInSlice(m["type"], []string{"disk", "gpu", "infiniband", "nic", "none", "unix-block", "unix-char", "usb"}) {
 			return fmt.Errorf("Invalid device type for device '%s'", name)
 		}
 
@@ -312,6 +327,18 @@ func containerValidDevices(db *db.Node, devices types.Devices, profile bool, exp
 			if shared.StringInSlice(m["nictype"], []string{"bridged", "macvlan", "physical", "sriov"}) && m["parent"] == "" {
 				return fmt.Errorf("Missing parent for %s type nic", m["nictype"])
 			}
+		} else if m["type"] == "infiniband" {
+			if m["nictype"] == "" {
+				return fmt.Errorf("Missing nic type")
+			}
+
+			if !shared.StringInSlice(m["nictype"], []string{"physical", "sriov"}) {
+				return fmt.Errorf("Bad nic type: %s", m["nictype"])
+			}
+
+			if m["parent"] == "" {
+				return fmt.Errorf("Missing parent for %s type nic", m["nictype"])
+			}
 		} else if m["type"] == "disk" {
 			if !expanded && !shared.StringInSlice(m["path"], diskDevicePaths) {
 				diskDevicePaths = append(diskDevicePaths, m["path"])
diff --git a/lxd/db/devices.go b/lxd/db/devices.go
index 6911f4360..f246a3b17 100644
--- a/lxd/db/devices.go
+++ b/lxd/db/devices.go
@@ -25,6 +25,8 @@ func dbDeviceTypeToString(t int) (string, error) {
 		return "usb", nil
 	case 6:
 		return "gpu", nil
+	case 7:
+		return "infiniband", nil
 	default:
 		return "", fmt.Errorf("Invalid device type %d", t)
 	}
@@ -46,6 +48,8 @@ func dbDeviceTypeToInt(t string) (int, error) {
 		return 5, nil
 	case "gpu":
 		return 6, nil
+	case "infiniband":
+		return 7, nil
 	default:
 		return -1, fmt.Errorf("Invalid device type %s", t)
 	}

From 9aefff736809b0bf259433427456858576a3d3cf Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Tue, 21 Nov 2017 15:45:51 +0100
Subject: [PATCH 2/4] devices: implement infiniband device detection

We use a hashmap where the key is the {P,V}F name as seen on the host. This way
we can later cheaply retrieve all info based on the "parent" or "host_name"
property of the relevant infiniband device.

Closes #3983.

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 lxd/devices.go | 225 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 225 insertions(+)

diff --git a/lxd/devices.go b/lxd/devices.go
index bda6fdfbb..241fd5104 100644
--- a/lxd/devices.go
+++ b/lxd/devices.go
@@ -1359,3 +1359,228 @@ func deviceLoadUsb() ([]usbDevice, error) {
 
 	return result, nil
 }
+
+const SCIB string = "/sys/class/infiniband"
+const SCNET string = "/sys/class/net"
+
+type IBF struct {
+	// port the function belongs to
+	Port int64
+
+	// name of the {physical,virtual} function
+	Fun string
+
+	// whether this is a physical (true) or virtual (false) function
+	PF bool
+
+	// device of the function
+	Device string
+
+	// uverb device of the function
+	PerPortDevices []string
+	PerFunDevices  []string
+}
+
+func deviceLoadInfiniband() (map[string]IBF, error) {
+	// check if there are any infiniband devices
+	fscib, err := os.Open(SCIB)
+	if err != nil {
+		if os.IsNotExist(err) {
+			return nil, os.ErrNotExist
+		}
+		return nil, err
+	}
+	defer fscib.Close()
+
+	// eg.g. mlx_i for i = 0, 1, ..., n
+	IBDevNames, err := fscib.Readdirnames(-1)
+	if err != nil {
+		return nil, err
+	}
+
+	if len(IBDevNames) == 0 {
+		return nil, os.ErrNotExist
+	}
+
+	// retrieve all network device names
+	fscnet, err := os.Open(SCNET)
+	if err != nil {
+		if os.IsNotExist(err) {
+			return nil, os.ErrNotExist
+		}
+		return nil, err
+	}
+	defer fscnet.Close()
+
+	// retrieve all network devices
+	NetDevNames, err := fscnet.Readdirnames(-1)
+	if err != nil {
+		return nil, err
+	}
+
+	if len(NetDevNames) == 0 {
+		return nil, os.ErrNotExist
+	}
+
+	var UseableDevices map[string]IBF
+	for _, IBDevName := range IBDevNames {
+		IBDevResourceFile := fmt.Sprintf("/sys/class/infiniband/%s/device/resource", IBDevName)
+		IBDevResourceBuf, err := ioutil.ReadFile(IBDevResourceFile)
+		if err != nil {
+			return nil, err
+		}
+
+		for _, NetDevName := range NetDevNames {
+			NetDevResourceFile := fmt.Sprintf("/sys/class/net/%s/device/resource", NetDevName)
+			NetDevResourceBuf, err := ioutil.ReadFile(NetDevResourceFile)
+			if err != nil {
+				if os.IsNotExist(err) {
+					continue
+				}
+				return nil, err
+			}
+
+			// If the device and the VF have the same address space
+			// they belong together.
+			if bytes.Compare(IBDevResourceBuf, NetDevResourceBuf) != 0 {
+				continue
+			}
+
+			// Now let's find the ports.
+			IBDevID := fmt.Sprintf("/sys/class/net/%s/dev_id", NetDevName)
+			IBDevPort := fmt.Sprintf("/sys/class/net/%s/dev_port", NetDevName)
+			DevIDBuf, err := ioutil.ReadFile(IBDevID)
+			if err != nil {
+				if os.IsNotExist(err) {
+					continue
+				}
+				return nil, err
+			}
+
+			DevIDString := strings.TrimSpace(string(DevIDBuf))
+			DevIDPort, err := strconv.ParseInt(DevIDString, 0, 64)
+			if err != nil {
+				return nil, err
+			}
+
+			DevPort := int64(0)
+			DevPortBuf, err := ioutil.ReadFile(IBDevPort)
+			if err != nil {
+				if !os.IsNotExist(err) {
+					return nil, err
+				}
+			} else {
+				DevPortString := strings.TrimSpace(string(DevPortBuf))
+				DevPort, err = strconv.ParseInt(DevPortString, 0, 64)
+				if err != nil {
+					return nil, err
+				}
+			}
+
+			Port := DevIDPort
+			if DevPort > DevIDPort {
+				Port = DevPort
+			}
+			Port++
+
+			NewIBF := IBF{
+				Port:   Port,
+				Fun:    IBDevName,
+				Device: NetDevName,
+			}
+
+			// identify the /dev/infiniband/uverb<idx> device
+			tmp := []string{}
+			IBUverb := fmt.Sprintf("/sys/class/net/%s/device/infiniband_verbs", NetDevName)
+			fuverb, err := os.Open(IBUverb)
+			if err != nil {
+				if !os.IsNotExist(err) {
+					return nil, os.ErrNotExist
+				}
+			} else {
+				defer fuverb.Close()
+
+				// optional: retrieve all network devices
+				tmp, err = fuverb.Readdirnames(-1)
+				if err != nil {
+					return nil, err
+				}
+
+				if len(tmp) != 1 {
+					return nil, os.ErrNotExist
+				}
+			}
+			for _, v := range tmp {
+				NewIBF.PerPortDevices = append(NewIBF.PerPortDevices, v)
+			}
+
+			// identify the /dev/infiniband/ucm<idx> device
+			tmp = []string{}
+			IBcm := fmt.Sprintf("/sys/class/net/%s/device/infiniband_ucm", NetDevName)
+			fcm, err := os.Open(IBcm)
+			if err != nil {
+				if !os.IsNotExist(err) {
+					return nil, os.ErrNotExist
+				}
+			} else {
+				defer fcm.Close()
+
+				// optional: retrieve all network devices
+				tmp, err = fcm.Readdirnames(-1)
+				if err != nil {
+					return nil, err
+				}
+
+				if len(tmp) != 1 {
+					return nil, os.ErrNotExist
+				}
+			}
+			for _, v := range tmp {
+				devPath := fmt.Sprintf("/dev/infiniband/%s", v)
+				NewIBF.PerPortDevices = append(NewIBF.PerPortDevices, devPath)
+			}
+
+			// identify the /dev/infiniband/{issm,umad}<idx> devices
+			tmp = []string{}
+			IBmad := fmt.Sprintf("/sys/class/net/%s/device/infiniband_mad", NetDevName)
+			ents, err := ioutil.ReadDir(IBmad)
+			if err != nil {
+				if !os.IsNotExist(err) {
+					return nil, err
+				}
+			} else {
+				for _, ent := range ents {
+					IBmadPort := fmt.Sprintf("%s/%s/port", IBmad, ent.Name())
+					portBuf, err := ioutil.ReadFile(IBmadPort)
+					if err != nil {
+						if !os.IsNotExist(err) {
+							return nil, err
+						}
+						continue
+					}
+
+					portStr := strings.TrimSpace(string(portBuf))
+					PortMad, err := strconv.ParseInt(portStr, 0, 64)
+					if err != nil {
+						return nil, err
+					}
+
+					if PortMad != NewIBF.Port {
+						continue
+					}
+
+					NewIBF.PerFunDevices = append(NewIBF.PerFunDevices, ent.Name())
+				}
+			}
+
+			// figure out whether this is a physical function
+			IBPF := fmt.Sprintf("/sys/class/net/%s/device/physfn", NetDevName)
+			NewIBF.PF = !shared.PathExists(IBPF)
+
+			UseableDevices[NetDevName] = NewIBF
+		}
+	}
+
+	// check whether the device is an infiniband device
+	return UseableDevices, nil
+}

From da108354be3218e2e1a6b7fa6e30cfbfd78a0909 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Wed, 22 Nov 2017 13:29:08 +0100
Subject: [PATCH 3/4] container_lxc: handle infiniband devices

Closes #3983.

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 lxd/container_lxc.go | 483 +++++++++++++++++++++++++++++++++++++++++++++------
 lxd/devices.go       |   6 +-
 shared/util.go       |   9 +
 3 files changed, 439 insertions(+), 59 deletions(-)

diff --git a/lxd/container_lxc.go b/lxd/container_lxc.go
index 5f58821e4..e58145a20 100644
--- a/lxd/container_lxc.go
+++ b/lxd/container_lxc.go
@@ -1352,7 +1352,7 @@ func (c *containerLXC) initLXC(config bool) error {
 			if err != nil {
 				return err
 			}
-		} else if m["type"] == "nic" {
+		} else if m["type"] == "nic" || m["type"] == "infiniband" {
 			// Fill in some fields from volatile
 			m, err = c.fillNetworkDevice(k, m)
 			if err != nil {
@@ -1448,6 +1448,7 @@ func (c *containerLXC) initLXC(config bool) error {
 					return err
 				}
 			}
+
 			// bump network index
 			networkidx++
 		} else if m["type"] == "disk" {
@@ -1633,7 +1634,7 @@ func (c *containerLXC) expandDevices() error {
 
 // setupUnixDevice() creates the unix device and sets up the necessary low-level
 // liblxc configuration items.
-func (c *containerLXC) setupUnixDevice(name string, dev types.Device, major int, minor int, path string, createMustSucceed bool) error {
+func (c *containerLXC) setupUnixDevice(prefix string, dev types.Device, major int, minor int, path string, createMustSucceed bool) error {
 	if c.IsPrivileged() && !c.state.OS.RunningInUserNS && c.state.OS.CGroupDevicesController {
 		err := lxcSetConfigItem(c.c, "lxc.cgroup.devices.allow", fmt.Sprintf("c %d:%d rwm", major, minor))
 		if err != nil {
@@ -1650,9 +1651,9 @@ func (c *containerLXC) setupUnixDevice(name string, dev types.Device, major int,
 	temp["minor"] = fmt.Sprintf("%d", minor)
 	temp["path"] = path
 
-	paths, err := c.createUnixDevice(name, temp)
+	paths, err := c.createUnixDevice(prefix, temp)
 	if err != nil {
-		logger.Debug("failed to create device", log.Ctx{"err": err, "device": name})
+		logger.Debug("failed to create device", log.Ctx{"err": err, "device": prefix})
 		if createMustSucceed {
 			return err
 		}
@@ -1840,12 +1841,14 @@ func (c *containerLXC) startCommon() (string, error) {
 	}
 
 	// Cleanup any existing leftover devices
-	c.removeUnixDevices()
+	c.removeUnixDevices("")
 	c.removeDiskDevices()
 	c.removeNetworkFilters()
 
 	var usbs []usbDevice
 	var gpus []gpuDevice
+	var infiniband map[string]IBF
+	var sriov []string
 	var nvidiaDevices []nvidiaGpuDevices
 	diskDevices := map[string]types.Device{}
 
@@ -1854,7 +1857,7 @@ func (c *containerLXC) startCommon() (string, error) {
 		m := c.expandedDevices[k]
 		if shared.StringInSlice(m["type"], []string{"unix-char", "unix-block"}) {
 			// Unix device
-			paths, err := c.createUnixDevice(k, m)
+			paths, err := c.createUnixDevice(fmt.Sprintf("unix.%s", k), m)
 			if err != nil {
 				return "", err
 			}
@@ -1884,7 +1887,7 @@ func (c *containerLXC) startCommon() (string, error) {
 					continue
 				}
 
-				err := c.setupUnixDevice(k, m, usb.major, usb.minor, usb.path, shared.IsTrue(m["required"]))
+				err := c.setupUnixDevice(fmt.Sprintf("unix.%s", k), m, usb.major, usb.minor, usb.path, shared.IsTrue(m["required"]))
 				if err != nil {
 					return "", err
 				}
@@ -1909,7 +1912,7 @@ func (c *containerLXC) startCommon() (string, error) {
 
 				found = true
 
-				err := c.setupUnixDevice(k, m, gpu.major, gpu.minor, gpu.path, true)
+				err := c.setupUnixDevice(fmt.Sprintf("unix.%s", k), m, gpu.major, gpu.minor, gpu.path, true)
 				if err != nil {
 					return "", err
 				}
@@ -1918,7 +1921,7 @@ func (c *containerLXC) startCommon() (string, error) {
 					continue
 				}
 
-				err = c.setupUnixDevice(k, m, gpu.nvidia.major, gpu.nvidia.minor, gpu.nvidia.path, true)
+				err = c.setupUnixDevice(fmt.Sprintf("unix.%s", k), m, gpu.nvidia.major, gpu.nvidia.minor, gpu.nvidia.path, true)
 				if err != nil {
 					return "", err
 				}
@@ -1928,7 +1931,7 @@ func (c *containerLXC) startCommon() (string, error) {
 
 			if sawNvidia {
 				for _, gpu := range nvidiaDevices {
-					err := c.setupUnixDevice(k, m, gpu.major, gpu.minor, gpu.path, true)
+					err := c.setupUnixDevice(fmt.Sprintf("unix.%s", k), m, gpu.major, gpu.minor, gpu.path, true)
 					if err != nil {
 						return "", err
 					}
@@ -1944,7 +1947,15 @@ func (c *containerLXC) startCommon() (string, error) {
 			if m["path"] != "/" {
 				diskDevices[k] = m
 			}
-		} else if m["type"] == "nic" {
+		} else if m["type"] == "nic" || m["type"] == "infiniband" {
+			var err error
+			if m["type"] == "infiniband" && infiniband == nil {
+				infiniband, err = deviceLoadInfiniband()
+				if err != nil {
+					return "", err
+				}
+			}
+
 			networkKeyPrefix := "lxc.net"
 			if !util.RuntimeLiblxcVersionAtLeast(2, 1, 0) {
 				networkKeyPrefix = "lxc.network"
@@ -1957,18 +1968,24 @@ func (c *containerLXC) startCommon() (string, error) {
 
 			networkidx := -1
 			reserved := []string{}
-			for _, k := range c.expandedDevices.DeviceNames() {
-				m := c.expandedDevices[k]
-				if m["type"] != "nic" {
+			for _, dName := range c.expandedDevices.DeviceNames() {
+				m := c.expandedDevices[dName]
+				if m["type"] != "nic" && m["type"] != "infiniband" {
 					continue
 				}
 				networkidx++
 
+				if shared.StringInSlice(dName, sriov) {
+					continue
+				} else {
+					sriov = append(sriov, dName)
+				}
+
 				if m["nictype"] != "sriov" {
 					continue
 				}
 
-				m, err = c.fillSriovNetworkDevice(k, m, reserved)
+				m, err = c.fillSriovNetworkDevice(dName, m, reserved)
 				if err != nil {
 					return "", err
 				}
@@ -1985,6 +2002,42 @@ func (c *containerLXC) startCommon() (string, error) {
 				if err != nil {
 					return "", err
 				}
+
+				if m["type"] == "infiniband" {
+					key := m["host_name"]
+					ifDev, ok := infiniband[key]
+					if !ok {
+						return "", fmt.Errorf("Specified infiniband device \"%s\" not found", key)
+					}
+
+					err := c.addInfinibandDevicesPerFun(dName, &ifDev, false)
+					if err != nil {
+						return "", err
+					}
+
+					err = c.addInfinibandDevicesPerPort(dName, &ifDev, false)
+					if err != nil {
+						return "", err
+					}
+				}
+			}
+
+			if m["type"] == "infiniband" && m["nictype"] == "physical" {
+				key := m["parent"]
+				ifDev, ok := infiniband[key]
+				if !ok {
+					return "", fmt.Errorf("Specified infiniband device \"%s\" not found", key)
+				}
+
+				err := c.addInfinibandDevicesPerFun(k, &ifDev, false)
+				if err != nil {
+					return "", err
+				}
+
+				err = c.addInfinibandDevicesPerPort(k, &ifDev, false)
+				if err != nil {
+					return "", err
+				}
 			}
 
 			if m["nictype"] == "bridged" && shared.IsTrue(m["security.mac_filtering"]) {
@@ -2307,7 +2360,7 @@ func (c *containerLXC) OnStart() error {
 	// Apply network limits
 	for _, name := range c.expandedDevices.DeviceNames() {
 		m := c.expandedDevices[name]
-		if m["type"] != "nic" {
+		if m["type"] != "nic" && m["type"] != "infiniband" {
 			continue
 		}
 
@@ -2535,7 +2588,7 @@ func (c *containerLXC) OnStop(target string) error {
 		}
 
 		// Clean all the unix devices
-		err = c.removeUnixDevices()
+		err = c.removeUnixDevices("")
 		if err != nil {
 			logger.Error("Unable to remove unix devices", log.Ctx{"container": c.Name(), "err": err})
 		}
@@ -2912,7 +2965,7 @@ func (c *containerLXC) Restore(sourceContainer container, stateful bool) error {
 
 func (c *containerLXC) cleanup() {
 	// Unmount any leftovers
-	c.removeUnixDevices()
+	c.removeUnixDevices("")
 	c.removeDiskDevices()
 	c.removeNetworkFilters()
 
@@ -2996,7 +3049,7 @@ func (c *containerLXC) Delete() error {
 	// Update network files
 	networkUpdateStatic(c.state, "")
 	for k, m := range c.expandedDevices {
-		if m["type"] != "nic" || m["nictype"] != "bridged" || (m["ipv4.address"] == "" && m["ipv6.address"] == "") {
+		if (m["type"] != "nic" && m["type"] != "infiniband") || m["nictype"] != "bridged" || (m["ipv4.address"] == "" && m["ipv6.address"] == "") {
 			continue
 		}
 
@@ -3856,12 +3909,14 @@ func (c *containerLXC) Update(args db.ContainerArgs, userRequested bool) error {
 
 		var usbs []usbDevice
 		var gpus []gpuDevice
+		var infiniband map[string]IBF
+		infinibandUnixDevices := make(map[string]bool)
 		var nvidiaDevices []nvidiaGpuDevices
 
 		// Live update the devices
 		for k, m := range removeDevices {
 			if shared.StringInSlice(m["type"], []string{"unix-char", "unix-block"}) {
-				err = c.removeUnixDevice(k, m)
+				err = c.removeUnixDevice(fmt.Sprintf("unix.%s", k), m)
 				if err != nil {
 					return err
 				}
@@ -3870,11 +3925,76 @@ func (c *containerLXC) Update(args db.ContainerArgs, userRequested bool) error {
 				if err != nil {
 					return err
 				}
-			} else if m["type"] == "nic" {
+			} else if m["type"] == "nic" || m["type"] == "infiniband" {
 				err = c.removeNetworkDevice(k, m)
 				if err != nil {
 					return err
 				}
+
+				// Wipe all per-{P,V}F unix devices associated
+				// with this infiniband device.
+				if m["type"] == "infiniband" {
+					// Check that we indeed have devices to remove
+					empty, _ := shared.PathIsEmpty(c.DevicesPath())
+					if empty {
+						return nil
+					}
+
+					// load all devices
+					dents, err := ioutil.ReadDir(c.DevicesPath())
+					if err != nil {
+						return err
+					}
+
+					// go through all the devices
+					for _, f := range dents {
+						// only handle infiniband.unix.<device-name>.
+						prefix := fmt.Sprintf("infiniband.unix.%s.", k)
+
+						// skip non infiniband.unix.<device-name> devices
+						devName := f.Name()
+						if !strings.HasPrefix(devName, prefix) {
+							continue
+						}
+
+						// if it's a regular file it's a per-port device
+						path := filepath.Join(c.DevicesPath(), devName)
+						if shared.IsRegularPath(path) {
+							// wipe the mock device
+							err := os.Remove(path)
+							if err != nil {
+								return err
+							}
+
+							// parse out the device name to get at the pristine per-port device name
+
+							recordDevName := strings.Replace(devName, fmt.Sprintf(".%s.", k), ".", 1)
+							// record the pristine per-port device name
+							_, ok := infinibandUnixDevices[recordDevName]
+							if !ok {
+								infinibandUnixDevices[recordDevName] = false
+							}
+
+							// defer wiping per-port devices after all infiniband devices scheduled
+							// for removal are gone
+							continue
+						}
+
+						// remove per {P,V}F infiniband.unix.<device-name> devices
+						tmp := strings.TrimPrefix(devName, fmt.Sprintf("infiniband.unix.%s.", k))
+						idx := strings.Index(tmp, ".")
+						charDevName := tmp[:idx]
+
+						dummyDevice := types.Device{
+							"path": fmt.Sprintf("/dev/infiniband/%s", charDevName),
+						}
+
+						err = c.removeUnixDevice(fmt.Sprintf("infiniband.unix.%s.%s", k, charDevName), dummyDevice)
+						if err != nil {
+							return err
+						}
+					}
+				}
 			} else if m["type"] == "usb" {
 				if usbs == nil {
 					usbs, err = deviceLoadUsb()
@@ -3889,7 +4009,7 @@ func (c *containerLXC) Update(args db.ContainerArgs, userRequested bool) error {
 						continue
 					}
 
-					err := c.removeUnixDeviceNum(k, m, usb.major, usb.minor, usb.path)
+					err := c.removeUnixDeviceNum(fmt.Sprintf("unix.%s", k), m, usb.major, usb.minor, usb.path)
 					if err != nil {
 						return err
 					}
@@ -3910,7 +4030,7 @@ func (c *containerLXC) Update(args db.ContainerArgs, userRequested bool) error {
 						continue
 					}
 
-					err := c.removeUnixDeviceNum(k, m, gpu.major, gpu.minor, gpu.path)
+					err := c.removeUnixDeviceNum(fmt.Sprintf("unix.%s", k), m, gpu.major, gpu.minor, gpu.path)
 					if err != nil {
 						logger.Error("Failed to remove GPU device.", log.Ctx{"err": err, "gpu": gpu, "container": c.Name()})
 						return err
@@ -3920,7 +4040,7 @@ func (c *containerLXC) Update(args db.ContainerArgs, userRequested bool) error {
 						continue
 					}
 
-					err = c.removeUnixDeviceNum(k, m, gpu.nvidia.major, gpu.nvidia.minor, gpu.nvidia.path)
+					err = c.removeUnixDeviceNum(fmt.Sprintf("unix.%s", k), m, gpu.nvidia.major, gpu.nvidia.minor, gpu.nvidia.path)
 					if err != nil {
 						logger.Error("Failed to remove GPU device.", log.Ctx{"err": err, "gpu": gpu, "container": c.Name()})
 						return err
@@ -3942,7 +4062,7 @@ func (c *containerLXC) Update(args db.ContainerArgs, userRequested bool) error {
 						if !c.deviceExists(k, gpu.path) {
 							continue
 						}
-						err = c.removeUnixDeviceNum(k, m, gpu.major, gpu.minor, gpu.path)
+						err = c.removeUnixDeviceNum(fmt.Sprintf("unix.%s", k), m, gpu.major, gpu.minor, gpu.path)
 						if err != nil {
 							logger.Error("Failed to remove GPU device.", log.Ctx{"err": err, "gpu": gpu, "container": c.Name()})
 							return err
@@ -3952,21 +4072,109 @@ func (c *containerLXC) Update(args db.ContainerArgs, userRequested bool) error {
 			}
 		}
 
+		// wipe all per-port infiniband unix devices
+		dents, err := ioutil.ReadDir(c.DevicesPath())
+		if err != nil {
+			return err
+		}
+
+		for devName, m := range c.expandedDevices {
+			if m["type"] != "infiniband" {
+				continue
+			}
+
+			ifDevPrefix := fmt.Sprintf("infiniband.unix.%s.", devName)
+			for _, f := range dents {
+				// we only care about regular files that indicate a dependency on a per-port device
+				if !strings.HasPrefix(f.Name(), ifDevPrefix) || !f.Mode().IsRegular() {
+					continue
+				}
+
+				cmp := strings.Replace(f.Name(), fmt.Sprintf(".%s.", devName), ".", 1)
+				v, ok := infinibandUnixDevices[cmp]
+				if ok && !v {
+					infinibandUnixDevices[cmp] = true
+					break
+				}
+			}
+		}
+
+		for k, v := range infinibandUnixDevices {
+			if v {
+				continue
+			}
+
+			tmp := strings.TrimPrefix(k, "infiniband.unix.")
+			idx := strings.Index(tmp, ".")
+			if idx == -1 {
+				return fmt.Errorf("Invalid infiniband device name detected")
+			}
+			funName := tmp[:idx]
+
+			tmp = tmp[idx+1:]
+			idx = strings.Index(tmp, ".")
+			if idx == -1 {
+				return fmt.Errorf("Invalid infiniband device name detected")
+			}
+			charDevName := tmp[:idx]
+
+			dummyDevice := types.Device{
+				"path": fmt.Sprintf("/dev/infiniband/%s", charDevName),
+			}
+
+			err = c.removeUnixDevice(fmt.Sprintf("infiniband.unix.%s.%s", funName, charDevName), dummyDevice)
+			if err != nil {
+				return err
+			}
+		}
+
 		diskDevices := map[string]types.Device{}
 
 		for k, m := range addDevices {
 			if shared.StringInSlice(m["type"], []string{"unix-char", "unix-block"}) {
-				err = c.insertUnixDevice(k, m)
+				err = c.insertUnixDevice(fmt.Sprintf("unix.%s", k), m)
 				if err != nil {
 					return err
 				}
 			} else if m["type"] == "disk" && m["path"] != "/" {
 				diskDevices[k] = m
-			} else if m["type"] == "nic" {
+			} else if m["type"] == "nic" || m["type"] == "infiniband" {
+				var err error
+				if m["type"] == "infiniband" && infiniband == nil {
+					infiniband, err = deviceLoadInfiniband()
+					if err != nil {
+						return err
+					}
+				}
+
 				err = c.insertNetworkDevice(k, m)
 				if err != nil {
 					return err
 				}
+
+				// Plugin in all character devices
+				if m["type"] == "infiniband" {
+					key := m["parent"]
+					if m["nictype"] == "sriov" {
+						key = m["host_name"]
+					}
+
+					ifDev, ok := infiniband[key]
+					if !ok {
+						return fmt.Errorf("Specified infiniband device \"%s\" not found", key)
+					}
+
+					err := c.addInfinibandDevicesPerFun(k, &ifDev, true)
+					if err != nil {
+						return err
+					}
+
+					err = c.addInfinibandDevicesPerPort(k, &ifDev, true)
+					if err != nil {
+						return err
+					}
+
+				}
 			} else if m["type"] == "usb" {
 				if usbs == nil {
 					usbs, err = deviceLoadUsb()
@@ -3980,7 +4188,7 @@ func (c *containerLXC) Update(args db.ContainerArgs, userRequested bool) error {
 						continue
 					}
 
-					err = c.insertUnixDeviceNum(k, m, usb.major, usb.minor, usb.path)
+					err = c.insertUnixDeviceNum(fmt.Sprintf("unix.%s", k), m, usb.major, usb.minor, usb.path)
 					if err != nil {
 						logger.Error("failed to insert usb device", log.Ctx{"err": err, "usb": usb, "container": c.Name()})
 					}
@@ -4005,7 +4213,7 @@ func (c *containerLXC) Update(args db.ContainerArgs, userRequested bool) error {
 
 					found = true
 
-					err = c.insertUnixDeviceNum(k, m, gpu.major, gpu.minor, gpu.path)
+					err = c.insertUnixDeviceNum(fmt.Sprintf("unix.%s", k), m, gpu.major, gpu.minor, gpu.path)
 					if err != nil {
 						logger.Error("Failed to insert GPU device.", log.Ctx{"err": err, "gpu": gpu, "container": c.Name()})
 						return err
@@ -4015,7 +4223,7 @@ func (c *containerLXC) Update(args db.ContainerArgs, userRequested bool) error {
 						continue
 					}
 
-					err = c.insertUnixDeviceNum(k, m, gpu.nvidia.major, gpu.nvidia.minor, gpu.nvidia.path)
+					err = c.insertUnixDeviceNum(fmt.Sprintf("unix.%s", k), m, gpu.nvidia.major, gpu.nvidia.minor, gpu.nvidia.path)
 					if err != nil {
 						logger.Error("Failed to insert GPU device.", log.Ctx{"err": err, "gpu": gpu, "container": c.Name()})
 						return err
@@ -4029,7 +4237,7 @@ func (c *containerLXC) Update(args db.ContainerArgs, userRequested bool) error {
 						if c.deviceExists(k, gpu.path) {
 							continue
 						}
-						err = c.insertUnixDeviceNum(k, m, gpu.major, gpu.minor, gpu.path)
+						err = c.insertUnixDeviceNum(fmt.Sprintf("unix.%s", k), m, gpu.major, gpu.minor, gpu.path)
 						if err != nil {
 							logger.Error("failed to insert GPU device", log.Ctx{"err": err, "gpu": gpu, "container": c.Name()})
 							return err
@@ -4054,7 +4262,7 @@ func (c *containerLXC) Update(args db.ContainerArgs, userRequested bool) error {
 		for k, m := range updateDevices {
 			if m["type"] == "disk" {
 				updateDiskLimit = true
-			} else if m["type"] == "nic" {
+			} else if m["type"] == "nic" || m["type"] == "infiniband" {
 				needsUpdate := false
 				for _, v := range containerNetworkLimitKeys {
 					needsUpdate = shared.StringInSlice(v, updateDiff)
@@ -4108,7 +4316,7 @@ func (c *containerLXC) Update(args db.ContainerArgs, userRequested bool) error {
 	netNames := []string{}
 	for _, k := range c.expandedDevices.DeviceNames() {
 		v := c.expandedDevices[k]
-		if v["type"] == "nic" {
+		if v["type"] == "nic" || v["type"] == "infiniband" {
 			netNames = append(netNames, k)
 		}
 	}
@@ -5601,16 +5809,16 @@ func (c *containerLXC) removeMount(mount string) error {
 }
 
 // Check if the unix device already exists.
-func (c *containerLXC) deviceExists(name string, path string) bool {
+func (c *containerLXC) deviceExists(prefix string, path string) bool {
 	relativeDestPath := strings.TrimPrefix(path, "/")
-	devName := fmt.Sprintf("unix.%s.%s", name, strings.Replace(relativeDestPath, "/", "-", -1))
+	devName := fmt.Sprintf("%s.%s", prefix, strings.Replace(relativeDestPath, "/", "-", -1))
 	devPath := filepath.Join(c.DevicesPath(), devName)
 
 	return shared.PathExists(devPath)
 }
 
 // Unix devices handling
-func (c *containerLXC) createUnixDevice(name string, m types.Device) ([]string, error) {
+func (c *containerLXC) createUnixDevice(prefix string, m types.Device) ([]string, error) {
 	var err error
 	var major, minor int
 
@@ -5697,7 +5905,7 @@ func (c *containerLXC) createUnixDevice(name string, m types.Device) ([]string,
 		destPath = m["source"]
 	}
 	relativeDestPath := strings.TrimPrefix(destPath, "/")
-	devName := fmt.Sprintf("unix.%s.%s", name, strings.Replace(relativeDestPath, "/", "-", -1))
+	devName := fmt.Sprintf("%s.%s", prefix, strings.Replace(relativeDestPath, "/", "-", -1))
 	devPath := filepath.Join(c.DevicesPath(), devName)
 
 	// Create the new entry
@@ -5743,14 +5951,14 @@ func (c *containerLXC) createUnixDevice(name string, m types.Device) ([]string,
 	return []string{devPath, relativeDestPath}, nil
 }
 
-func (c *containerLXC) insertUnixDevice(name string, m types.Device) error {
+func (c *containerLXC) insertUnixDevice(prefix string, m types.Device) error {
 	// Check that the container is running
 	if !c.IsRunning() {
 		return fmt.Errorf("Can't insert device into stopped container")
 	}
 
 	// Create the device on the host
-	paths, err := c.createUnixDevice(name, m)
+	paths, err := c.createUnixDevice(prefix, m)
 	if err != nil {
 		return fmt.Errorf("Failed to setup device: %s", err)
 	}
@@ -5818,7 +6026,57 @@ func (c *containerLXC) insertUnixDeviceNum(name string, m types.Device, major in
 	return c.insertUnixDevice(name, temp)
 }
 
-func (c *containerLXC) removeUnixDevice(name string, m types.Device) error {
+func (c *containerLXC) removeUnixDeviceByPaths(prefix string, srcPath string, destPath string) error {
+	// Check that the container is running
+	pid := c.InitPID()
+	if pid == -1 {
+		return fmt.Errorf("Can't remove device from stopped container")
+	}
+
+	relativeHostDevPath := strings.TrimPrefix(srcPath, "/")
+	hostDevName := fmt.Sprintf("%s.%s", prefix, strings.Replace(relativeHostDevPath, "/", "-", -1))
+	hostDevPath := filepath.Join(c.DevicesPath(), hostDevName)
+
+	dType, dMajor, dMinor, err := deviceGetAttributes(hostDevPath)
+	if err != nil {
+		return err
+	}
+
+	if c.IsPrivileged() && !c.state.OS.RunningInUserNS && c.state.OS.CGroupDevicesController {
+		// Remove the device cgroup rule
+		err = c.CGroupSet("devices.deny", fmt.Sprintf("%s %d:%d rwm", dType, dMajor, dMinor))
+		if err != nil {
+			return err
+		}
+	}
+
+	relativeDestPath := strings.TrimPrefix(destPath, "/")
+	if c.FileExists(relativeDestPath) == nil {
+		err = c.removeMount(destPath)
+		if err != nil {
+			return fmt.Errorf("Error unmounting the device: %s", err)
+		}
+
+		err = c.FileRemove(relativeDestPath)
+		if err != nil {
+			return fmt.Errorf("Error removing the device: %s", err)
+		}
+	}
+
+	// Remove the host side
+	if c.state.OS.RunningInUserNS {
+		syscall.Unmount(hostDevPath, syscall.MNT_DETACH)
+	}
+
+	err = os.Remove(hostDevPath)
+	if err != nil {
+		return err
+	}
+
+	return nil
+}
+
+func (c *containerLXC) removeUnixDevice(prefix string, m types.Device) error {
 	// Check that the container is running
 	pid := c.InitPID()
 	if pid == -1 {
@@ -5857,7 +6115,7 @@ func (c *containerLXC) removeUnixDevice(name string, m types.Device) error {
 		destPath = m["source"]
 	}
 	relativeDestPath := strings.TrimPrefix(destPath, "/")
-	devName := fmt.Sprintf("unix.%s.%s", name, strings.Replace(relativeDestPath, "/", "-", -1))
+	devName := fmt.Sprintf("%s.%s", prefix, strings.Replace(relativeDestPath, "/", "-", -1))
 	devPath := filepath.Join(c.DevicesPath(), devName)
 
 	if dType == "" || dMajor < 0 || dMinor < 0 {
@@ -5900,7 +6158,7 @@ func (c *containerLXC) removeUnixDevice(name string, m types.Device) error {
 	return nil
 }
 
-func (c *containerLXC) removeUnixDeviceNum(name string, m types.Device, major int, minor int, path string) error {
+func (c *containerLXC) removeUnixDeviceNum(prefix string, m types.Device, major int, minor int, path string) error {
 	pid := c.InitPID()
 	if pid == -1 {
 		return fmt.Errorf("Can't remove device from stopped container")
@@ -5915,7 +6173,7 @@ func (c *containerLXC) removeUnixDeviceNum(name string, m types.Device, major in
 	temp["minor"] = fmt.Sprintf("%d", minor)
 	temp["path"] = path
 
-	err := c.removeUnixDevice(name, temp)
+	err := c.removeUnixDevice(prefix, temp)
 	if err != nil {
 		logger.Error("failed to remove device", log.Ctx{"err": err, m["type"]: path, "container": c.Name()})
 		return err
@@ -5925,7 +6183,120 @@ func (c *containerLXC) removeUnixDeviceNum(name string, m types.Device, major in
 	return nil
 }
 
-func (c *containerLXC) removeUnixDevices() error {
+func (c *containerLXC) addInfinibandDevicesPerPort(deviceName string, ifDev *IBF, inject bool) error {
+	for _, unixCharDev := range ifDev.PerPortDevices {
+		destPath := fmt.Sprintf("/dev/infiniband/%s", unixCharDev)
+		relativeDestPath := fmt.Sprintf("dev/infiniband/%s", unixCharDev)
+		deslashedRelativeDestPath := strings.Replace(relativeDestPath, "/", "-", -1)
+
+		uniquePhysDevPrefix := fmt.Sprintf("infiniband.unix.%s.%s", ifDev.Fun, unixCharDev)
+		uniqueMockDevPrefix := fmt.Sprintf("infiniband.unix.%s.%s.%s", deviceName, ifDev.Fun, unixCharDev)
+
+		uniquePhysDevName := fmt.Sprintf("%s.%s", uniquePhysDevPrefix, deslashedRelativeDestPath)
+		uniqueMockDevName := fmt.Sprintf("%s.%s", uniqueMockDevPrefix, deslashedRelativeDestPath)
+
+		hostPhysDevPath := filepath.Join(c.DevicesPath(), uniquePhysDevName)
+		hostMockDevPath := filepath.Join(c.DevicesPath(), uniqueMockDevName)
+
+		f, err := os.Create(hostMockDevPath)
+		if err != nil {
+			return err
+		}
+		f.Close()
+
+		// Device has already been created before so we only create a dummy file and continue.
+		if shared.PathExists(hostPhysDevPath) {
+			continue
+		}
+
+		// Unix device
+		dummyDevice := types.Device{
+			"source": destPath,
+		}
+
+		if inject {
+			err := c.insertUnixDevice(uniquePhysDevPrefix, dummyDevice)
+			if err != nil {
+				return err
+			}
+			continue
+		}
+
+		paths, err := c.createUnixDevice(uniquePhysDevPrefix, dummyDevice)
+		if err != nil {
+			return err
+		}
+		devPath := paths[0]
+
+		// inform liblxc about the mount
+		err = lxcSetConfigItem(c.c, "lxc.mount.entry", fmt.Sprintf("%s %s none bind,create=file", hostPhysDevPath, relativeDestPath))
+		if err != nil {
+			return err
+		}
+
+		if c.IsPrivileged() && !c.state.OS.RunningInUserNS && c.state.OS.CGroupDevicesController {
+			// Add the new device cgroup rule
+			dType, dMajor, dMinor, err := deviceGetAttributes(devPath)
+			if err != nil {
+				return err
+			}
+
+			err = lxcSetConfigItem(c.c, "lxc.cgroup.devices.allow", fmt.Sprintf("%s %d:%d rwm", dType, dMajor, dMinor))
+			if err != nil {
+				return fmt.Errorf("Failed to add cgroup rule for device")
+			}
+		}
+	}
+
+	return nil
+}
+
+func (c *containerLXC) addInfinibandDevicesPerFun(deviceName string, ifDev *IBF, inject bool) error {
+	for _, unixCharDev := range ifDev.PerFunDevices {
+		destPath := fmt.Sprintf("/dev/infiniband/%s", unixCharDev)
+		uniqueDevPrefix := fmt.Sprintf("infiniband.unix.%s.%s", deviceName, unixCharDev)
+		relativeDestPath := fmt.Sprintf("dev/infiniband/%s", unixCharDev)
+		uniqueDevName := fmt.Sprintf("%s.%s", uniqueDevPrefix, strings.Replace(relativeDestPath, "/", "-", -1))
+		hostDevPath := filepath.Join(c.DevicesPath(), uniqueDevName)
+
+		dummyDevice := types.Device{
+			"source": destPath,
+		}
+
+		if inject {
+			err := c.insertUnixDevice(uniqueDevPrefix, dummyDevice)
+			if err != nil {
+				return err
+			}
+			continue
+		}
+
+		// inform liblxc about the mount
+		err := lxcSetConfigItem(c.c, "lxc.mount.entry", fmt.Sprintf("%s %s none bind,create=file", hostDevPath, relativeDestPath))
+		if err != nil {
+			return err
+		}
+
+		paths, err := c.createUnixDevice(uniqueDevPrefix, dummyDevice)
+		devPath := paths[0]
+		if c.IsPrivileged() && !c.state.OS.RunningInUserNS && c.state.OS.CGroupDevicesController {
+			// Add the new device cgroup rule
+			dType, dMajor, dMinor, err := deviceGetAttributes(devPath)
+			if err != nil {
+				return err
+			}
+
+			err = lxcSetConfigItem(c.c, "lxc.cgroup.devices.allow", fmt.Sprintf("%s %d:%d rwm", dType, dMajor, dMinor))
+			if err != nil {
+				return fmt.Errorf("Failed to add cgroup rule for device")
+			}
+		}
+	}
+
+	return nil
+}
+
+func (c *containerLXC) removeUnixDevices(prefix string) error {
 	// Check that we indeed have devices to remove
 	if !shared.PathExists(c.DevicesPath()) {
 		return nil
@@ -5940,7 +6311,7 @@ func (c *containerLXC) removeUnixDevices() error {
 	// Go through all the unix devices
 	for _, f := range dents {
 		// Skip non-Unix devices
-		if !strings.HasPrefix(f.Name(), "unix.") {
+		if !strings.HasPrefix(f.Name(), prefix) {
 			continue
 		}
 
@@ -6130,21 +6501,21 @@ func (c *containerLXC) fillSriovNetworkDevice(name string, m types.Device, reser
 			return nil, err
 		}
 
-		if len(ents) != 1 {
-			continue
-		}
+		for _, ent := range ents {
+			// another nic device entry called dibs
+			if shared.StringInSlice(ent.Name(), reserved) {
+				continue
+			}
 
-		// another nic device entry called dibs
-		if shared.StringInSlice(ents[0].Name(), reserved) {
-			continue
+			// found a free one
+			nicName = ent.Name()
+			break
 		}
 
-		// found a free one
-		nicName = ents[0].Name()
 		break
 	}
 
-	if nicName == "" {
+	if nicName == "" && m["type"] != "infiniband" {
 		if sriovNum == sriovTotal {
 			return nil, fmt.Errorf("All virtual functions of sriov device '%s' seem to be in use", m["parent"])
 		}
@@ -6274,7 +6645,7 @@ func (c *containerLXC) fillNetworkDevice(name string, m types.Device) (types.Dev
 	}
 
 	// Fill in the MAC address
-	if m["nictype"] != "physical" && m["hwaddr"] == "" {
+	if m["nictype"] != "physical" && m["hwaddr"] == "" && m["type"] != "infiniband" {
 		configKey := fmt.Sprintf("volatile.%s.hwaddr", name)
 		volatileHwaddr := c.localConfig[configKey]
 		if volatileHwaddr == "" {
@@ -6991,7 +7362,7 @@ func (c *containerLXC) getHostInterface(name string) string {
 
 	for _, k := range c.expandedDevices.DeviceNames() {
 		dev := c.expandedDevices[k]
-		if dev["type"] != "nic" {
+		if dev["type"] != "nic" && dev["type"] != "infiniband" {
 			continue
 		}
 
diff --git a/lxd/devices.go b/lxd/devices.go
index 241fd5104..abb68af31 100644
--- a/lxd/devices.go
+++ b/lxd/devices.go
@@ -795,13 +795,13 @@ func deviceUSBEvent(s *state.State, usb usbDevice) {
 			}
 
 			if usb.action == "add" {
-				err := c.insertUnixDeviceNum(name, m, usb.major, usb.minor, usb.path)
+				err := c.insertUnixDeviceNum(fmt.Sprintf("unix.%s", name), m, usb.major, usb.minor, usb.path)
 				if err != nil {
 					logger.Error("failed to create usb device", log.Ctx{"err": err, "usb": usb, "container": c.Name()})
 					return
 				}
 			} else if usb.action == "remove" {
-				err := c.removeUnixDeviceNum(name, m, usb.major, usb.minor, usb.path)
+				err := c.removeUnixDeviceNum(fmt.Sprintf("unix.%s", name), m, usb.major, usb.minor, usb.path)
 				if err != nil {
 					logger.Error("failed to remove usb device", log.Ctx{"err": err, "usb": usb, "container": c.Name()})
 					return
@@ -1422,7 +1422,7 @@ func deviceLoadInfiniband() (map[string]IBF, error) {
 		return nil, os.ErrNotExist
 	}
 
-	var UseableDevices map[string]IBF
+	UseableDevices := make(map[string]IBF)
 	for _, IBDevName := range IBDevNames {
 		IBDevResourceFile := fmt.Sprintf("/sys/class/infiniband/%s/device/resource", IBDevName)
 		IBDevResourceBuf, err := ioutil.ReadFile(IBDevResourceFile)
diff --git a/shared/util.go b/shared/util.go
index e84eaeb26..84d740b1d 100644
--- a/shared/util.go
+++ b/shared/util.go
@@ -502,6 +502,15 @@ func IsTrue(value string) bool {
 	return false
 }
 
+func IsRegularPath(pathName string) bool {
+	sb, err := os.Stat(pathName)
+	if err != nil {
+		return false
+	}
+
+	return sb.Mode().IsRegular()
+}
+
 func IsBlockdev(fm os.FileMode) bool {
 	return ((fm&os.ModeDevice != 0) && (fm&os.ModeCharDevice == 0))
 }

From eebfc335b2e266ae720e97e22140bd787ebb4ebb Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Fri, 1 Dec 2017 23:51:42 +0100
Subject: [PATCH 4/4] doc: add "infiniband" api extension

Closes #3983.

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 doc/api-extensions.md |  3 +++
 doc/containers.md     | 27 +++++++++++++++++++++++++++
 shared/version/api.go |  1 +
 3 files changed, 31 insertions(+)

diff --git a/doc/api-extensions.md b/doc/api-extensions.md
index 70b1076c1..8b4f54b33 100644
--- a/doc/api-extensions.md
+++ b/doc/api-extensions.md
@@ -359,3 +359,6 @@ This adds support for SR-IOV enabled network devices.
 
 ## console
 This adds support to interact with the container console device and console log.
+
+## infiniband
+This adds support to use infiniband network devices.
diff --git a/doc/containers.md b/doc/containers.md
index 8d93f1f0a..cd2268db0 100644
--- a/doc/containers.md
+++ b/doc/containers.md
@@ -231,6 +231,33 @@ lxc config device add <container> <device-name> nic nictype=sriov parent=<sriov-
 To tell LXD to use a specific unused VF add the `host_name` property and pass
 it the name of the enabled VF.
 
+### Type: infiniband
+LXD supports two different kind of network types for infiniband devices:
+
+ - `physical`: Straight physical device passthrough from the host. The targeted device will vanish from the host and appear in the container.
+ - `sriov`: Passes a virtual function of an SR-IOV enabled physical network device into the container.
+
+Different network interface types have different additional properties, the current list is:
+
+Key                     | Type      | Default           | Required  | Used by         | API extension | Description
+:--                     | :--       | :--               | :--       | :--             | :--           | :--
+nictype                 | string    | -                 | yes       | all             | infiniband    | The device type, one of "physical", or "sriov"
+name                    | string    | kernel assigned   | no        | all             | infiniband    | The name of the interface inside the container
+hwaddr                  | string    | randomly assigned | no        | all             | infiniband    | The MAC address of the new interface
+mtu                     | integer   | parent MTU        | no        | all             | infiniband    | The MTU of the new interface
+parent                  | string    | -                 | yes       | physical, sriov | infiniband    | The name of the host device or bridge
+
+To create a `sriov` `infiniband` device use:
+
+```
+lxc config device add <container> <device-name> infiniband nictype=sriov parent=<sriov-enabled-device>
+```
+
+To create a `physical` `infiniband` device use:
+
+```
+lxc config device add <container> <device-name> infiniband nictype=physical parent=<device>
+```
 
 ### Type: disk
 Disk entries are essentially mountpoints inside the container. They can
diff --git a/shared/version/api.go b/shared/version/api.go
index 13ddbb7b0..b318c253b 100644
--- a/shared/version/api.go
+++ b/shared/version/api.go
@@ -80,4 +80,5 @@ var APIExtensions = []string{
 	"macaroon_authentication",
 	"network_sriov",
 	"console",
+	"infiniband",
 }


More information about the lxc-devel mailing list