[lxc-devel] [lxd/master] VM: Physical PCI passthrough

tomponline on Github lxc-bot at linuxcontainers.org
Fri Jan 24 12:04:05 UTC 2020


A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 321 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20200124/7b17cdf4/attachment.bin>
-------------- next part --------------
From 705419634434c56122217b97f6e92de0774550ff Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Thu, 23 Jan 2020 17:20:06 +0000
Subject: [PATCH 01/10] lxd/container/lxc: Removes VM specific NIC config
 ignoring

As no longer returned for containers.

Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
 lxd/container_lxc.go | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/lxd/container_lxc.go b/lxd/container_lxc.go
index 895ca69ee9..0a4497f47e 100644
--- a/lxd/container_lxc.go
+++ b/lxd/container_lxc.go
@@ -2171,10 +2171,6 @@ func (c *containerLXC) startCommon() (string, []func() error, error) {
 			}
 
 			for _, nicItem := range runConf.NetworkInterface {
-				if nicItem.Key == "devName" {
-					// Skip internal device name key, not used by liblxc.
-					continue
-				}
 				err = lxcSetConfigItem(c.c, fmt.Sprintf("%s.%d.%s", networkKeyPrefix, nicID, nicItem.Key), nicItem.Value)
 				if err != nil {
 					return "", postStartHooks, errors.Wrapf(err, "Failed to setup device network interface '%s'", dev.Name)

From 6e41efee218aa389af788874d86b90ff27e730b0 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Thu, 23 Jan 2020 17:20:56 +0000
Subject: [PATCH 02/10] lxd/device: Only return devName NIC config item for VMs

Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
 lxd/device/infiniband_physical.go | 8 +++++++-
 lxd/device/infiniband_sriov.go    | 1 -
 lxd/device/nic_bridged.go         | 7 ++++---
 lxd/device/nic_macvlan.go         | 7 ++++---
 lxd/device/nic_p2p.go             | 7 ++++---
 lxd/device/nic_physical.go        | 8 +++++++-
 lxd/device/nic_sriov.go           | 1 -
 7 files changed, 26 insertions(+), 13 deletions(-)

diff --git a/lxd/device/infiniband_physical.go b/lxd/device/infiniband_physical.go
index 6c6947b5bc..482f39d817 100644
--- a/lxd/device/infiniband_physical.go
+++ b/lxd/device/infiniband_physical.go
@@ -116,13 +116,19 @@ func (d *infinibandPhysical) Start() (*deviceConfig.RunConfig, error) {
 	}
 
 	runConf.NetworkInterface = []deviceConfig.RunConfigItem{
-		{Key: "devName", Value: d.name},
 		{Key: "name", Value: d.config["name"]},
 		{Key: "type", Value: "phys"},
 		{Key: "flags", Value: "up"},
 		{Key: "link", Value: saveData["host_name"]},
 	}
 
+	if d.inst.Type() == instancetype.VM {
+		runConf.NetworkInterface = append(runConf.NetworkInterface,
+			[]deviceConfig.RunConfigItem{
+				{Key: "devName", Value: d.name},
+			}...)
+	}
+
 	return &runConf, nil
 }
 
diff --git a/lxd/device/infiniband_sriov.go b/lxd/device/infiniband_sriov.go
index cb673e5ee5..b6932e9753 100644
--- a/lxd/device/infiniband_sriov.go
+++ b/lxd/device/infiniband_sriov.go
@@ -138,7 +138,6 @@ func (d *infinibandSRIOV) Start() (*deviceConfig.RunConfig, error) {
 	}
 
 	runConf.NetworkInterface = []deviceConfig.RunConfigItem{
-		{Key: "devName", Value: d.name},
 		{Key: "name", Value: d.config["name"]},
 		{Key: "type", Value: "phys"},
 		{Key: "flags", Value: "up"},
diff --git a/lxd/device/nic_bridged.go b/lxd/device/nic_bridged.go
index dd1e534799..7633dac620 100644
--- a/lxd/device/nic_bridged.go
+++ b/lxd/device/nic_bridged.go
@@ -176,7 +176,6 @@ func (d *nicBridged) Start() (*deviceConfig.RunConfig, error) {
 
 	runConf := deviceConfig.RunConfig{}
 	runConf.NetworkInterface = []deviceConfig.RunConfigItem{
-		{Key: "devName", Value: d.name},
 		{Key: "name", Value: d.config["name"]},
 		{Key: "type", Value: "phys"},
 		{Key: "flags", Value: "up"},
@@ -185,8 +184,10 @@ func (d *nicBridged) Start() (*deviceConfig.RunConfig, error) {
 
 	if d.inst.Type() == instancetype.VM {
 		runConf.NetworkInterface = append(runConf.NetworkInterface,
-			deviceConfig.RunConfigItem{Key: "hwaddr", Value: d.config["hwaddr"]},
-		)
+			[]deviceConfig.RunConfigItem{
+				{Key: "devName", Value: d.name},
+				{Key: "hwaddr", Value: d.config["hwaddr"]},
+			}...)
 	}
 
 	return &runConf, nil
diff --git a/lxd/device/nic_macvlan.go b/lxd/device/nic_macvlan.go
index 43cb93a651..131f5d8bdc 100644
--- a/lxd/device/nic_macvlan.go
+++ b/lxd/device/nic_macvlan.go
@@ -134,7 +134,6 @@ func (d *nicMACVLAN) Start() (*deviceConfig.RunConfig, error) {
 
 	runConf := deviceConfig.RunConfig{}
 	runConf.NetworkInterface = []deviceConfig.RunConfigItem{
-		{Key: "devName", Value: d.name},
 		{Key: "name", Value: d.config["name"]},
 		{Key: "type", Value: "phys"},
 		{Key: "flags", Value: "up"},
@@ -143,8 +142,10 @@ func (d *nicMACVLAN) Start() (*deviceConfig.RunConfig, error) {
 
 	if d.inst.Type() == instancetype.VM {
 		runConf.NetworkInterface = append(runConf.NetworkInterface,
-			deviceConfig.RunConfigItem{Key: "hwaddr", Value: d.config["hwaddr"]},
-		)
+			[]deviceConfig.RunConfigItem{
+				{Key: "devName", Value: d.name},
+				{Key: "hwaddr", Value: d.config["hwaddr"]},
+			}...)
 	}
 
 	revert.Success()
diff --git a/lxd/device/nic_p2p.go b/lxd/device/nic_p2p.go
index 0bfa97462f..2043597110 100644
--- a/lxd/device/nic_p2p.go
+++ b/lxd/device/nic_p2p.go
@@ -97,7 +97,6 @@ func (d *nicP2P) Start() (*deviceConfig.RunConfig, error) {
 
 	runConf := deviceConfig.RunConfig{}
 	runConf.NetworkInterface = []deviceConfig.RunConfigItem{
-		{Key: "devName", Value: d.name},
 		{Key: "name", Value: d.config["name"]},
 		{Key: "type", Value: "phys"},
 		{Key: "flags", Value: "up"},
@@ -106,8 +105,10 @@ func (d *nicP2P) Start() (*deviceConfig.RunConfig, error) {
 
 	if d.inst.Type() == instancetype.VM {
 		runConf.NetworkInterface = append(runConf.NetworkInterface,
-			deviceConfig.RunConfigItem{Key: "hwaddr", Value: d.config["hwaddr"]},
-		)
+			[]deviceConfig.RunConfigItem{
+				{Key: "devName", Value: d.name},
+				{Key: "hwaddr", Value: d.config["hwaddr"]},
+			}...)
 	}
 
 	return &runConf, nil
diff --git a/lxd/device/nic_physical.go b/lxd/device/nic_physical.go
index 54d98c6a0d..5a043db9e0 100644
--- a/lxd/device/nic_physical.go
+++ b/lxd/device/nic_physical.go
@@ -111,13 +111,19 @@ func (d *nicPhysical) Start() (*deviceConfig.RunConfig, error) {
 
 	runConf := deviceConfig.RunConfig{}
 	runConf.NetworkInterface = []deviceConfig.RunConfigItem{
-		{Key: "devName", Value: d.name},
 		{Key: "name", Value: d.config["name"]},
 		{Key: "type", Value: "phys"},
 		{Key: "flags", Value: "up"},
 		{Key: "link", Value: saveData["host_name"]},
 	}
 
+	if d.inst.Type() == instancetype.VM {
+		runConf.NetworkInterface = append(runConf.NetworkInterface,
+			[]deviceConfig.RunConfigItem{
+				{Key: "devName", Value: d.name},
+			}...)
+	}
+
 	return &runConf, nil
 }
 
diff --git a/lxd/device/nic_sriov.go b/lxd/device/nic_sriov.go
index b9fb4a8f30..d57c52604c 100644
--- a/lxd/device/nic_sriov.go
+++ b/lxd/device/nic_sriov.go
@@ -113,7 +113,6 @@ func (d *nicSRIOV) Start() (*deviceConfig.RunConfig, error) {
 
 	runConf := deviceConfig.RunConfig{}
 	runConf.NetworkInterface = []deviceConfig.RunConfigItem{
-		{Key: "devName", Value: d.name},
 		{Key: "name", Value: d.config["name"]},
 		{Key: "type", Value: "phys"},
 		{Key: "flags", Value: "up"},

From c0549eab26ac264954506c333ecd6111a31b63d0 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Thu, 23 Jan 2020 17:50:22 +0000
Subject: [PATCH 03/10] lxd/device/nic/physical: Improves revert and deletion
 of created VLAN devices

Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
 lxd/device/nic_physical.go | 37 +++++++++++++++++++++++++------------
 1 file changed, 25 insertions(+), 12 deletions(-)

diff --git a/lxd/device/nic_physical.go b/lxd/device/nic_physical.go
index 5a043db9e0..e8470de7ef 100644
--- a/lxd/device/nic_physical.go
+++ b/lxd/device/nic_physical.go
@@ -5,6 +5,7 @@ import (
 
 	deviceConfig "github.com/lxc/lxd/lxd/device/config"
 	"github.com/lxc/lxd/lxd/instance/instancetype"
+	"github.com/lxc/lxd/lxd/revert"
 	"github.com/lxc/lxd/shared"
 )
 
@@ -62,6 +63,9 @@ func (d *nicPhysical) Start() (*deviceConfig.RunConfig, error) {
 
 	saveData := make(map[string]string)
 
+	revert := revert.New()
+	defer revert.Fail()
+
 	// Record the host_name device used for restoration later.
 	saveData["host_name"] = NetworkGetHostDevice(d.config["parent"], d.config["vlan"])
 	statusDev, err := NetworkCreateVlanDeviceIfNeeded(d.state, d.config["parent"], saveData["host_name"], d.config["vlan"])
@@ -72,16 +76,15 @@ func (d *nicPhysical) Start() (*deviceConfig.RunConfig, error) {
 	// Record whether we created this device or not so it can be removed on stop.
 	saveData["last_state.created"] = fmt.Sprintf("%t", statusDev != "existing")
 
-	// If we return from this function with an error, ensure we clean up created device.
-	defer func() {
-		if err != nil && statusDev == "created" {
-			NetworkRemoveInterface(saveData["host_name"])
-		}
-	}()
+	if shared.IsTrue(saveData["last_state.created"]) {
+		revert.Add(func() {
+			NetworkRemoveInterfaceIfNeeded(d.state, saveData["host_name"], d.inst, d.config["parent"], d.config["vlan"])
+		})
+	}
 
-	// If we didn't create the device we should track various properties so we can
-	// restore them when the instance is stopped or the device is detached.
-	if statusDev == "existing" {
+	// If we didn't create the device we should track various properties so we can restore them when the
+	// instance is stopped or the device is detached.
+	if !shared.IsTrue(saveData["last_state.created"]) {
 		err = networkSnapshotPhysicalNic(saveData["host_name"], saveData)
 		if err != nil {
 			return nil, err
@@ -124,6 +127,7 @@ func (d *nicPhysical) Start() (*deviceConfig.RunConfig, error) {
 			}...)
 	}
 
+	revert.Success()
 	return &runConf, nil
 }
 
@@ -151,9 +155,18 @@ func (d *nicPhysical) postStop() error {
 
 	v := d.volatileGet()
 	hostName := NetworkGetHostDevice(d.config["parent"], d.config["vlan"])
-	err := networkRestorePhysicalNic(hostName, v)
-	if err != nil {
-		return err
+
+	// This will delete the parent interface if we created it for VLAN parent.
+	if shared.IsTrue(v["last_state.created"]) {
+		err := NetworkRemoveInterfaceIfNeeded(d.state, hostName, d.inst, d.config["parent"], d.config["vlan"])
+		if err != nil {
+			return err
+		}
+	} else {
+		err := networkRestorePhysicalNic(hostName, v)
+		if err != nil {
+			return err
+		}
 	}
 
 	return nil

From 526a1e97c5b4085aa3c8eecdf7cad955c4e8d06f Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Thu, 23 Jan 2020 17:51:13 +0000
Subject: [PATCH 04/10] lxd/instance/drivers/driver/qemu/templates: Clarifies
 qemuNetdevPhysical variables

Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
 lxd/instance/drivers/driver_qemu_templates.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lxd/instance/drivers/driver_qemu_templates.go b/lxd/instance/drivers/driver_qemu_templates.go
index af15de4f6a..a557dc127a 100644
--- a/lxd/instance/drivers/driver_qemu_templates.go
+++ b/lxd/instance/drivers/driver_qemu_templates.go
@@ -220,6 +220,6 @@ var qemuNetdevPhysical = template.Must(template.New("qemuNetdevPhysical").Parse(
 # Network card ("{{.devName}}" device)
 [device "dev-lxd_{{.devName}}"]
 driver = "vfio-pci"
-host = "{{.host}}"
+host = "{{.pciSlotName}}"
 bootindex = "{{.bootIndex}}"
 `))

From 9080516bfe3a3d8e60c872d2d4184dc95b141aca Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Fri, 24 Jan 2020 09:30:12 +0000
Subject: [PATCH 05/10] lxd/device/nic/macvlan: Differentiates config parent
 from actual parent

Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
 lxd/device/nic_macvlan.go | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/lxd/device/nic_macvlan.go b/lxd/device/nic_macvlan.go
index 131f5d8bdc..65401ebcc9 100644
--- a/lxd/device/nic_macvlan.go
+++ b/lxd/device/nic_macvlan.go
@@ -67,13 +67,13 @@ func (d *nicMACVLAN) Start() (*deviceConfig.RunConfig, error) {
 	saveData := make(map[string]string)
 
 	// Decide which parent we should use based on VLAN setting.
-	parentName := NetworkGetHostDevice(d.config["parent"], d.config["vlan"])
+	actualParentName := NetworkGetHostDevice(d.config["parent"], d.config["vlan"])
 
 	// Record the temporary device name used for deletion later.
 	saveData["host_name"] = NetworkRandomDevName("mac")
 
 	// Create VLAN parent device if needed.
-	statusDev, err := NetworkCreateVlanDeviceIfNeeded(d.state, d.config["parent"], parentName, d.config["vlan"])
+	statusDev, err := NetworkCreateVlanDeviceIfNeeded(d.state, d.config["parent"], actualParentName, d.config["vlan"])
 	if err != nil {
 		return nil, err
 	}
@@ -83,19 +83,19 @@ func (d *nicMACVLAN) Start() (*deviceConfig.RunConfig, error) {
 
 	if shared.IsTrue(saveData["last_state.created"]) {
 		revert.Add(func() {
-			NetworkRemoveInterfaceIfNeeded(d.state, parentName, d.inst, d.config["parent"], d.config["vlan"])
+			NetworkRemoveInterfaceIfNeeded(d.state, actualParentName, d.inst, d.config["parent"], d.config["vlan"])
 		})
 	}
 
 	if d.inst.Type() == instancetype.Container {
 		// Create MACVLAN interface.
-		_, err = shared.RunCommand("ip", "link", "add", "dev", saveData["host_name"], "link", parentName, "type", "macvlan", "mode", "bridge")
+		_, err = shared.RunCommand("ip", "link", "add", "dev", saveData["host_name"], "link", actualParentName, "type", "macvlan", "mode", "bridge")
 		if err != nil {
 			return nil, err
 		}
 	} else if d.inst.Type() == instancetype.VM {
 		// Create MACVTAP interface.
-		_, err = shared.RunCommand("ip", "link", "add", "dev", saveData["host_name"], "link", parentName, "type", "macvtap", "mode", "bridge")
+		_, err = shared.RunCommand("ip", "link", "add", "dev", saveData["host_name"], "link", actualParentName, "type", "macvtap", "mode", "bridge")
 		if err != nil {
 			return nil, err
 		}
@@ -187,8 +187,8 @@ func (d *nicMACVLAN) postStop() error {
 
 	// This will delete the parent interface if we created it for VLAN parent.
 	if shared.IsTrue(v["last_state.created"]) {
-		parentName := NetworkGetHostDevice(d.config["parent"], d.config["vlan"])
-		err := NetworkRemoveInterfaceIfNeeded(d.state, parentName, d.inst, d.config["parent"], d.config["vlan"])
+		actualParentName := NetworkGetHostDevice(d.config["parent"], d.config["vlan"])
+		err := NetworkRemoveInterfaceIfNeeded(d.state, actualParentName, d.inst, d.config["parent"], d.config["vlan"])
 		if err != nil {
 			errs = append(errs, err)
 		}

From c5c7f99170c7b992dcf3a0be0040d1ab0b4d69cf Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Fri, 24 Jan 2020 09:31:09 +0000
Subject: [PATCH 06/10] lxd/device/device/utils/network: Adds
 networkGetDevicePCIDevice function

Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
 lxd/device/device_utils_network.go | 44 ++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/lxd/device/device_utils_network.go b/lxd/device/device_utils_network.go
index b2fc04fbd0..a37b69aaef 100644
--- a/lxd/device/device_utils_network.go
+++ b/lxd/device/device_utils_network.go
@@ -783,3 +783,47 @@ func networkParsePortRange(r string) (int64, int64, error) {
 
 	return base, size, nil
 }
+
+// pciDevice represents info about a PCI uevent device.
+type pciDevice struct {
+	ID       string
+	SlotName string
+	Driver   string
+}
+
+// networkGetDevicePCISlot returns the PCI device info for a given uevent file.
+func networkGetDevicePCIDevice(ueventFilePath string) (pciDevice, error) {
+	dev := pciDevice{}
+
+	file, err := os.Open(ueventFilePath)
+	if err != nil {
+		return dev, err
+	}
+	defer file.Close()
+
+	scanner := bufio.NewScanner(file)
+	for scanner.Scan() {
+		// Looking for something like this "PCI_SLOT_NAME=0000:05:10.0"
+		fields := strings.SplitN(scanner.Text(), "=", 2)
+		if len(fields) == 2 {
+			if fields[0] == "PCI_SLOT_NAME" {
+				dev.SlotName = fields[1]
+			} else if fields[0] == "PCI_ID" {
+				dev.ID = fields[1]
+			} else if fields[0] == "DRIVER" {
+				dev.Driver = fields[1]
+			}
+		}
+	}
+
+	err = scanner.Err()
+	if err != nil {
+		return dev, err
+	}
+
+	if dev.SlotName == "" {
+		return dev, fmt.Errorf("Device uevent file could not be parsed")
+	}
+
+	return dev, nil
+}

From 59e8189740924d70a8440b61137d417fec594368 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Fri, 24 Jan 2020 09:31:47 +0000
Subject: [PATCH 07/10] lxd/device/nic/sriov: Updates networkGetVFDevicePCISlot
 to use networkGetDevicePCIDevice

Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
 lxd/device/nic_sriov.go | 21 +++------------------
 1 file changed, 3 insertions(+), 18 deletions(-)

diff --git a/lxd/device/nic_sriov.go b/lxd/device/nic_sriov.go
index d57c52604c..03e2a63ccb 100644
--- a/lxd/device/nic_sriov.go
+++ b/lxd/device/nic_sriov.go
@@ -5,7 +5,6 @@ import (
 	"bytes"
 	"fmt"
 	"io/ioutil"
-	"os"
 	"os/exec"
 	"path/filepath"
 	"regexp"
@@ -483,27 +482,13 @@ func (d *nicSRIOV) networkGetVirtFuncInfo(devName string, vfID int) (vf virtFunc
 
 // networkGetVFDevicePCISlot returns the PCI slot name for a network virtual function device.
 func (d *nicSRIOV) networkGetVFDevicePCISlot(vfID string) (string, error) {
-	file, err := os.Open(fmt.Sprintf("/sys/class/net/%s/device/virtfn%s/uevent", d.config["parent"], vfID))
+	ueventFile := fmt.Sprintf("/sys/class/net/%s/device/virtfn%s/uevent", d.config["parent"], vfID)
+	pciDev, err := networkGetDevicePCIDevice(ueventFile)
 	if err != nil {
 		return "", err
 	}
-	defer file.Close()
 
-	scanner := bufio.NewScanner(file)
-	for scanner.Scan() {
-		// Looking for something like this "PCI_SLOT_NAME=0000:05:10.0"
-		fields := strings.SplitN(scanner.Text(), "=", 2)
-		if len(fields) == 2 && fields[0] == "PCI_SLOT_NAME" {
-			return fields[1], nil
-		}
-	}
-
-	err = scanner.Err()
-	if err != nil {
-		return "", err
-	}
-
-	return "", fmt.Errorf("PCI_SLOT_NAME not found")
+	return pciDev.SlotName, nil
 }
 
 // networkGetVFDeviceDriverPath returns the path to the network virtual function device driver in /sys.

From e9779d85ec6d61dc957962f730f3732a3ba048b9 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Fri, 24 Jan 2020 09:36:00 +0000
Subject: [PATCH 08/10] lxd/instance/drivers/driver/qemu: Adds physical NIC
 passthrough support

Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
 lxd/instance/drivers/driver_qemu.go | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/lxd/instance/drivers/driver_qemu.go b/lxd/instance/drivers/driver_qemu.go
index dd4196d9dc..971c57bce8 100644
--- a/lxd/instance/drivers/driver_qemu.go
+++ b/lxd/instance/drivers/driver_qemu.go
@@ -1425,7 +1425,7 @@ func (vm *qemu) addDriveConfig(sb *strings.Builder, bootIndexes map[string]int,
 
 // addNetDevConfig adds the qemu config required for adding a network device.
 func (vm *qemu) addNetDevConfig(sb *strings.Builder, nicIndex int, bootIndexes map[string]int, nicConfig []deviceConfig.RunConfigItem, fdFiles *[]string) error {
-	var devName, nicName, devHwaddr string
+	var devName, nicName, devHwaddr, pciSlotName string
 	for _, nicItem := range nicConfig {
 		if nicItem.Key == "devName" {
 			devName = nicItem.Value
@@ -1433,6 +1433,8 @@ func (vm *qemu) addNetDevConfig(sb *strings.Builder, nicIndex int, bootIndexes m
 			nicName = nicItem.Value
 		} else if nicItem.Key == "hwaddr" {
 			devHwaddr = nicItem.Value
+		} else if nicItem.Key == "pciSlotName" {
+			pciSlotName = nicItem.Value
 		}
 	}
 
@@ -1467,6 +1469,10 @@ func (vm *qemu) addNetDevConfig(sb *strings.Builder, nicIndex int, bootIndexes m
 		// Detect TAP (via TUN driver) device.
 		tplFields["ifName"] = nicName
 		tpl = qemuNetDevTapTun
+	} else if pciSlotName != "" {
+		// Detect physical passthrough device.
+		tplFields["pciSlotName"] = pciSlotName
+		tpl = qemuNetdevPhysical
 	}
 
 	if tpl != nil {

From b0ee3b86a755d116f1126c0d69f8305e8d012852 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Fri, 24 Jan 2020 12:01:48 +0000
Subject: [PATCH 09/10] shared/instance: Updates config key checker to allow
 ".driver" keys

Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
 shared/instance.go | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/shared/instance.go b/shared/instance.go
index 1264e991c7..7830c0e806 100644
--- a/shared/instance.go
+++ b/shared/instance.go
@@ -418,6 +418,10 @@ func ConfigKeyChecker(key string) (func(value string) error, error) {
 		if strings.HasSuffix(key, ".ceph_rbd") {
 			return IsAny, nil
 		}
+
+		if strings.HasSuffix(key, ".driver") {
+			return IsAny, nil
+		}
 	}
 
 	if strings.HasPrefix(key, "environment.") {

From b60d70d2d1216fb34c79c97d02812c71b049ad60 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Fri, 24 Jan 2020 12:03:03 +0000
Subject: [PATCH 10/10] lxd/device/nic/physical: Adds VM PCI passthrough
 support

Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
 lxd/device/nic_physical.go | 95 ++++++++++++++++++++++++++++++++++++--
 1 file changed, 90 insertions(+), 5 deletions(-)

diff --git a/lxd/device/nic_physical.go b/lxd/device/nic_physical.go
index e8470de7ef..feb94525bb 100644
--- a/lxd/device/nic_physical.go
+++ b/lxd/device/nic_physical.go
@@ -2,10 +2,16 @@ package device
 
 import (
 	"fmt"
+	"io/ioutil"
+	"strings"
+	"time"
+
+	"github.com/pkg/errors"
 
 	deviceConfig "github.com/lxc/lxd/lxd/device/config"
 	"github.com/lxc/lxd/lxd/instance/instancetype"
 	"github.com/lxc/lxd/lxd/revert"
+	"github.com/lxc/lxd/lxd/util"
 	"github.com/lxc/lxd/shared"
 )
 
@@ -15,7 +21,7 @@ type nicPhysical struct {
 
 // validateConfig checks the supplied config for correctness.
 func (d *nicPhysical) validateConfig() error {
-	if d.inst.Type() != instancetype.Container {
+	if d.inst.Type() != instancetype.Container && d.inst.Type() != instancetype.VM {
 		return ErrUnsupportedDevType
 	}
 
@@ -66,6 +72,14 @@ func (d *nicPhysical) Start() (*deviceConfig.RunConfig, error) {
 	revert := revert.New()
 	defer revert.Fail()
 
+	// If VM, then try and load the vfio-pci module first.
+	if d.inst.Type() == instancetype.VM {
+		err = util.LoadModule("vfio-pci")
+		if err != nil {
+			return nil, errors.Wrapf(err, "Error loading %q module", "vfio-pci")
+		}
+	}
+
 	// Record the host_name device used for restoration later.
 	saveData["host_name"] = NetworkGetHostDevice(d.config["parent"], d.config["vlan"])
 	statusDev, err := NetworkCreateVlanDeviceIfNeeded(d.state, d.config["parent"], saveData["host_name"], d.config["vlan"])
@@ -107,6 +121,58 @@ func (d *nicPhysical) Start() (*deviceConfig.RunConfig, error) {
 		}
 	}
 
+	// If VM, then prepare device for passthrough into VM.
+	var pciSlotName string
+	if d.inst.Type() == instancetype.VM {
+		// Get PCI information about the network interface.
+		ueventPath := fmt.Sprintf("/sys/class/net/%s/device/uevent", saveData["host_name"])
+		pciDev, err := networkGetDevicePCIDevice(ueventPath)
+		if err != nil {
+			return nil, errors.Wrapf(err, "Failed to get PCI device info for %q", saveData["host_name"])
+		}
+
+		saveData["last_state.pci.slot.name"] = pciDev.SlotName
+		saveData["last_state.pci.driver"] = pciDev.Driver
+
+		// vfio-pci module takes device IDs as "n n" but uevent returns them as "n:n".
+		devIDParts := strings.SplitN(pciDev.ID, ":", 2)
+		if len(devIDParts) < 2 {
+			return nil, fmt.Errorf("Invalid device ID from %q", ueventPath)
+		}
+
+		// Unbind the interface from the host.
+		unbindPath := fmt.Sprintf("/sys/class/net/%s/device/driver/unbind", saveData["host_name"])
+		err = ioutil.WriteFile(unbindPath, []byte(pciDev.SlotName), 0600)
+		if err != nil {
+			return nil, errors.Wrapf(err, "Failed unbinding PCI device %q from host via %q", pciDev.SlotName, unbindPath)
+		}
+
+		// Register the device with the vfio-pci module.
+		vfioPCINewIDPath := "/sys/bus/pci/drivers/vfio-pci/new_id"
+		err = ioutil.WriteFile(vfioPCINewIDPath, []byte(fmt.Sprintf("%s %s", devIDParts[0], devIDParts[1])), 0600)
+		if err != nil {
+			return nil, errors.Wrapf(err, "Failed adding PCI device ID %q to %q", pciDev.ID, vfioPCINewIDPath)
+		}
+
+		waitForBind := func() error {
+			for i := 0; i < 10; i++ {
+				if shared.PathExists(fmt.Sprintf("/sys/bus/pci/drivers/vfio-pci/%s", pciDev.SlotName)) {
+					return nil
+				}
+
+				time.Sleep(50 * time.Millisecond)
+			}
+			return fmt.Errorf("Bind of device %q took too long", pciDev.SlotName)
+		}
+
+		err = waitForBind()
+		if err != nil {
+			return nil, err
+		}
+
+		pciSlotName = pciDev.SlotName
+	}
+
 	err = d.volatileSet(saveData)
 	if err != nil {
 		return nil, err
@@ -124,6 +190,7 @@ func (d *nicPhysical) Start() (*deviceConfig.RunConfig, error) {
 		runConf.NetworkInterface = append(runConf.NetworkInterface,
 			[]deviceConfig.RunConfigItem{
 				{Key: "devName", Value: d.name},
+				{Key: "pciSlotName", Value: pciSlotName},
 			}...)
 	}
 
@@ -147,13 +214,31 @@ func (d *nicPhysical) Stop() (*deviceConfig.RunConfig, error) {
 // postStop is run after the device is removed from the instance.
 func (d *nicPhysical) postStop() error {
 	defer d.volatileSet(map[string]string{
-		"host_name":          "",
-		"last_state.hwaddr":  "",
-		"last_state.mtu":     "",
-		"last_state.created": "",
+		"host_name":                "",
+		"last_state.hwaddr":        "",
+		"last_state.mtu":           "",
+		"last_state.created":       "",
+		"last_state.pci.slot.name": "",
+		"last_state.pci.driver":    "",
 	})
 
 	v := d.volatileGet()
+
+	if v["last_state.pci.slot.name"] != "" && v["last_state.pci.driver"] != "" {
+		vfioPCIUnbindPath := "/sys/bus/pci/drivers/vfio-pci/unbind"
+		driverBindPath := fmt.Sprintf("/sys/bus/pci/drivers/%s/bind", v["last_state.pci.driver"])
+
+		err := ioutil.WriteFile(vfioPCIUnbindPath, []byte(v["last_state.pci.slot.name"]), 0600)
+		if err != nil {
+			return errors.Wrapf(err, "Failed unbinding PCI device %q from %q", v["last_state.pci.slot.name"], vfioPCIUnbindPath)
+		}
+
+		err = ioutil.WriteFile(driverBindPath, []byte(v["last_state.pci.slot.name"]), 0600)
+		if err != nil {
+			return errors.Wrapf(err, "Failed binding PCI device %q to %q", v["last_state.pci.slot.name"], driverBindPath)
+		}
+	}
+
 	hostName := NetworkGetHostDevice(d.config["parent"], d.config["vlan"])
 
 	// This will delete the parent interface if we created it for VLAN parent.


More information about the lxc-devel mailing list