[lxc-devel] [lxd/master] VM: Adds SR-IOV NIC support
tomponline on Github
lxc-bot at linuxcontainers.org
Mon Jan 27 17:55:50 UTC 2020
A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 346 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20200127/b5b9a261/attachment-0001.bin>
-------------- next part --------------
From c04da5d4aac94bc859d76a8b355cb09a0d4f3603 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Thu, 23 Jan 2020 17:20:06 +0000
Subject: [PATCH 01/15] lxd/container/lxc: Removes VM specific NIC config
ignoring
As no longer returned for containers.
Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
lxd/container_lxc.go | 4 ----
1 file changed, 4 deletions(-)
diff --git a/lxd/container_lxc.go b/lxd/container_lxc.go
index 895ca69ee9..0a4497f47e 100644
--- a/lxd/container_lxc.go
+++ b/lxd/container_lxc.go
@@ -2171,10 +2171,6 @@ func (c *containerLXC) startCommon() (string, []func() error, error) {
}
for _, nicItem := range runConf.NetworkInterface {
- if nicItem.Key == "devName" {
- // Skip internal device name key, not used by liblxc.
- continue
- }
err = lxcSetConfigItem(c.c, fmt.Sprintf("%s.%d.%s", networkKeyPrefix, nicID, nicItem.Key), nicItem.Value)
if err != nil {
return "", postStartHooks, errors.Wrapf(err, "Failed to setup device network interface '%s'", dev.Name)
From ac6520f5ad03e4a2a89309060bb8de8cf0eccf0d Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Thu, 23 Jan 2020 17:20:56 +0000
Subject: [PATCH 02/15] lxd/device: Only return devName NIC config item for VMs
Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
lxd/device/infiniband_physical.go | 8 +++++++-
lxd/device/infiniband_sriov.go | 1 -
lxd/device/nic_bridged.go | 7 ++++---
lxd/device/nic_macvlan.go | 7 ++++---
lxd/device/nic_p2p.go | 7 ++++---
lxd/device/nic_physical.go | 8 +++++++-
lxd/device/nic_sriov.go | 1 -
7 files changed, 26 insertions(+), 13 deletions(-)
diff --git a/lxd/device/infiniband_physical.go b/lxd/device/infiniband_physical.go
index 6c6947b5bc..482f39d817 100644
--- a/lxd/device/infiniband_physical.go
+++ b/lxd/device/infiniband_physical.go
@@ -116,13 +116,19 @@ func (d *infinibandPhysical) Start() (*deviceConfig.RunConfig, error) {
}
runConf.NetworkInterface = []deviceConfig.RunConfigItem{
- {Key: "devName", Value: d.name},
{Key: "name", Value: d.config["name"]},
{Key: "type", Value: "phys"},
{Key: "flags", Value: "up"},
{Key: "link", Value: saveData["host_name"]},
}
+ if d.inst.Type() == instancetype.VM {
+ runConf.NetworkInterface = append(runConf.NetworkInterface,
+ []deviceConfig.RunConfigItem{
+ {Key: "devName", Value: d.name},
+ }...)
+ }
+
return &runConf, nil
}
diff --git a/lxd/device/infiniband_sriov.go b/lxd/device/infiniband_sriov.go
index cb673e5ee5..b6932e9753 100644
--- a/lxd/device/infiniband_sriov.go
+++ b/lxd/device/infiniband_sriov.go
@@ -138,7 +138,6 @@ func (d *infinibandSRIOV) Start() (*deviceConfig.RunConfig, error) {
}
runConf.NetworkInterface = []deviceConfig.RunConfigItem{
- {Key: "devName", Value: d.name},
{Key: "name", Value: d.config["name"]},
{Key: "type", Value: "phys"},
{Key: "flags", Value: "up"},
diff --git a/lxd/device/nic_bridged.go b/lxd/device/nic_bridged.go
index dd1e534799..7633dac620 100644
--- a/lxd/device/nic_bridged.go
+++ b/lxd/device/nic_bridged.go
@@ -176,7 +176,6 @@ func (d *nicBridged) Start() (*deviceConfig.RunConfig, error) {
runConf := deviceConfig.RunConfig{}
runConf.NetworkInterface = []deviceConfig.RunConfigItem{
- {Key: "devName", Value: d.name},
{Key: "name", Value: d.config["name"]},
{Key: "type", Value: "phys"},
{Key: "flags", Value: "up"},
@@ -185,8 +184,10 @@ func (d *nicBridged) Start() (*deviceConfig.RunConfig, error) {
if d.inst.Type() == instancetype.VM {
runConf.NetworkInterface = append(runConf.NetworkInterface,
- deviceConfig.RunConfigItem{Key: "hwaddr", Value: d.config["hwaddr"]},
- )
+ []deviceConfig.RunConfigItem{
+ {Key: "devName", Value: d.name},
+ {Key: "hwaddr", Value: d.config["hwaddr"]},
+ }...)
}
return &runConf, nil
diff --git a/lxd/device/nic_macvlan.go b/lxd/device/nic_macvlan.go
index 43cb93a651..131f5d8bdc 100644
--- a/lxd/device/nic_macvlan.go
+++ b/lxd/device/nic_macvlan.go
@@ -134,7 +134,6 @@ func (d *nicMACVLAN) Start() (*deviceConfig.RunConfig, error) {
runConf := deviceConfig.RunConfig{}
runConf.NetworkInterface = []deviceConfig.RunConfigItem{
- {Key: "devName", Value: d.name},
{Key: "name", Value: d.config["name"]},
{Key: "type", Value: "phys"},
{Key: "flags", Value: "up"},
@@ -143,8 +142,10 @@ func (d *nicMACVLAN) Start() (*deviceConfig.RunConfig, error) {
if d.inst.Type() == instancetype.VM {
runConf.NetworkInterface = append(runConf.NetworkInterface,
- deviceConfig.RunConfigItem{Key: "hwaddr", Value: d.config["hwaddr"]},
- )
+ []deviceConfig.RunConfigItem{
+ {Key: "devName", Value: d.name},
+ {Key: "hwaddr", Value: d.config["hwaddr"]},
+ }...)
}
revert.Success()
diff --git a/lxd/device/nic_p2p.go b/lxd/device/nic_p2p.go
index 0bfa97462f..2043597110 100644
--- a/lxd/device/nic_p2p.go
+++ b/lxd/device/nic_p2p.go
@@ -97,7 +97,6 @@ func (d *nicP2P) Start() (*deviceConfig.RunConfig, error) {
runConf := deviceConfig.RunConfig{}
runConf.NetworkInterface = []deviceConfig.RunConfigItem{
- {Key: "devName", Value: d.name},
{Key: "name", Value: d.config["name"]},
{Key: "type", Value: "phys"},
{Key: "flags", Value: "up"},
@@ -106,8 +105,10 @@ func (d *nicP2P) Start() (*deviceConfig.RunConfig, error) {
if d.inst.Type() == instancetype.VM {
runConf.NetworkInterface = append(runConf.NetworkInterface,
- deviceConfig.RunConfigItem{Key: "hwaddr", Value: d.config["hwaddr"]},
- )
+ []deviceConfig.RunConfigItem{
+ {Key: "devName", Value: d.name},
+ {Key: "hwaddr", Value: d.config["hwaddr"]},
+ }...)
}
return &runConf, nil
diff --git a/lxd/device/nic_physical.go b/lxd/device/nic_physical.go
index 54d98c6a0d..5a043db9e0 100644
--- a/lxd/device/nic_physical.go
+++ b/lxd/device/nic_physical.go
@@ -111,13 +111,19 @@ func (d *nicPhysical) Start() (*deviceConfig.RunConfig, error) {
runConf := deviceConfig.RunConfig{}
runConf.NetworkInterface = []deviceConfig.RunConfigItem{
- {Key: "devName", Value: d.name},
{Key: "name", Value: d.config["name"]},
{Key: "type", Value: "phys"},
{Key: "flags", Value: "up"},
{Key: "link", Value: saveData["host_name"]},
}
+ if d.inst.Type() == instancetype.VM {
+ runConf.NetworkInterface = append(runConf.NetworkInterface,
+ []deviceConfig.RunConfigItem{
+ {Key: "devName", Value: d.name},
+ }...)
+ }
+
return &runConf, nil
}
diff --git a/lxd/device/nic_sriov.go b/lxd/device/nic_sriov.go
index b9fb4a8f30..d57c52604c 100644
--- a/lxd/device/nic_sriov.go
+++ b/lxd/device/nic_sriov.go
@@ -113,7 +113,6 @@ func (d *nicSRIOV) Start() (*deviceConfig.RunConfig, error) {
runConf := deviceConfig.RunConfig{}
runConf.NetworkInterface = []deviceConfig.RunConfigItem{
- {Key: "devName", Value: d.name},
{Key: "name", Value: d.config["name"]},
{Key: "type", Value: "phys"},
{Key: "flags", Value: "up"},
From 92208250966e5d8ce179799a190ec97010d8c1b6 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Thu, 23 Jan 2020 17:50:22 +0000
Subject: [PATCH 03/15] lxd/device/nic/physical: Improves revert and deletion
of created VLAN devices
Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
lxd/device/nic_physical.go | 37 +++++++++++++++++++++++++------------
1 file changed, 25 insertions(+), 12 deletions(-)
diff --git a/lxd/device/nic_physical.go b/lxd/device/nic_physical.go
index 5a043db9e0..e8470de7ef 100644
--- a/lxd/device/nic_physical.go
+++ b/lxd/device/nic_physical.go
@@ -5,6 +5,7 @@ import (
deviceConfig "github.com/lxc/lxd/lxd/device/config"
"github.com/lxc/lxd/lxd/instance/instancetype"
+ "github.com/lxc/lxd/lxd/revert"
"github.com/lxc/lxd/shared"
)
@@ -62,6 +63,9 @@ func (d *nicPhysical) Start() (*deviceConfig.RunConfig, error) {
saveData := make(map[string]string)
+ revert := revert.New()
+ defer revert.Fail()
+
// Record the host_name device used for restoration later.
saveData["host_name"] = NetworkGetHostDevice(d.config["parent"], d.config["vlan"])
statusDev, err := NetworkCreateVlanDeviceIfNeeded(d.state, d.config["parent"], saveData["host_name"], d.config["vlan"])
@@ -72,16 +76,15 @@ func (d *nicPhysical) Start() (*deviceConfig.RunConfig, error) {
// Record whether we created this device or not so it can be removed on stop.
saveData["last_state.created"] = fmt.Sprintf("%t", statusDev != "existing")
- // If we return from this function with an error, ensure we clean up created device.
- defer func() {
- if err != nil && statusDev == "created" {
- NetworkRemoveInterface(saveData["host_name"])
- }
- }()
+ if shared.IsTrue(saveData["last_state.created"]) {
+ revert.Add(func() {
+ NetworkRemoveInterfaceIfNeeded(d.state, saveData["host_name"], d.inst, d.config["parent"], d.config["vlan"])
+ })
+ }
- // If we didn't create the device we should track various properties so we can
- // restore them when the instance is stopped or the device is detached.
- if statusDev == "existing" {
+ // If we didn't create the device we should track various properties so we can restore them when the
+ // instance is stopped or the device is detached.
+ if !shared.IsTrue(saveData["last_state.created"]) {
err = networkSnapshotPhysicalNic(saveData["host_name"], saveData)
if err != nil {
return nil, err
@@ -124,6 +127,7 @@ func (d *nicPhysical) Start() (*deviceConfig.RunConfig, error) {
}...)
}
+ revert.Success()
return &runConf, nil
}
@@ -151,9 +155,18 @@ func (d *nicPhysical) postStop() error {
v := d.volatileGet()
hostName := NetworkGetHostDevice(d.config["parent"], d.config["vlan"])
- err := networkRestorePhysicalNic(hostName, v)
- if err != nil {
- return err
+
+ // This will delete the parent interface if we created it for VLAN parent.
+ if shared.IsTrue(v["last_state.created"]) {
+ err := NetworkRemoveInterfaceIfNeeded(d.state, hostName, d.inst, d.config["parent"], d.config["vlan"])
+ if err != nil {
+ return err
+ }
+ } else {
+ err := networkRestorePhysicalNic(hostName, v)
+ if err != nil {
+ return err
+ }
}
return nil
From 07883a1918ce975b857b6c5ddc72efb59c8bb72d Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Thu, 23 Jan 2020 17:51:13 +0000
Subject: [PATCH 04/15] lxd/instance/drivers/driver/qemu/templates: Clarifies
qemuNetdevPhysical variables
Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
lxd/instance/drivers/driver_qemu_templates.go | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lxd/instance/drivers/driver_qemu_templates.go b/lxd/instance/drivers/driver_qemu_templates.go
index af15de4f6a..a557dc127a 100644
--- a/lxd/instance/drivers/driver_qemu_templates.go
+++ b/lxd/instance/drivers/driver_qemu_templates.go
@@ -220,6 +220,6 @@ var qemuNetdevPhysical = template.Must(template.New("qemuNetdevPhysical").Parse(
# Network card ("{{.devName}}" device)
[device "dev-lxd_{{.devName}}"]
driver = "vfio-pci"
-host = "{{.host}}"
+host = "{{.pciSlotName}}"
bootindex = "{{.bootIndex}}"
`))
From 3a7188c004499cc087af9bd3939be215fb61e18a Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Fri, 24 Jan 2020 09:30:12 +0000
Subject: [PATCH 05/15] lxd/device/nic/macvlan: Differentiates config parent
from actual parent
Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
lxd/device/nic_macvlan.go | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/lxd/device/nic_macvlan.go b/lxd/device/nic_macvlan.go
index 131f5d8bdc..65401ebcc9 100644
--- a/lxd/device/nic_macvlan.go
+++ b/lxd/device/nic_macvlan.go
@@ -67,13 +67,13 @@ func (d *nicMACVLAN) Start() (*deviceConfig.RunConfig, error) {
saveData := make(map[string]string)
// Decide which parent we should use based on VLAN setting.
- parentName := NetworkGetHostDevice(d.config["parent"], d.config["vlan"])
+ actualParentName := NetworkGetHostDevice(d.config["parent"], d.config["vlan"])
// Record the temporary device name used for deletion later.
saveData["host_name"] = NetworkRandomDevName("mac")
// Create VLAN parent device if needed.
- statusDev, err := NetworkCreateVlanDeviceIfNeeded(d.state, d.config["parent"], parentName, d.config["vlan"])
+ statusDev, err := NetworkCreateVlanDeviceIfNeeded(d.state, d.config["parent"], actualParentName, d.config["vlan"])
if err != nil {
return nil, err
}
@@ -83,19 +83,19 @@ func (d *nicMACVLAN) Start() (*deviceConfig.RunConfig, error) {
if shared.IsTrue(saveData["last_state.created"]) {
revert.Add(func() {
- NetworkRemoveInterfaceIfNeeded(d.state, parentName, d.inst, d.config["parent"], d.config["vlan"])
+ NetworkRemoveInterfaceIfNeeded(d.state, actualParentName, d.inst, d.config["parent"], d.config["vlan"])
})
}
if d.inst.Type() == instancetype.Container {
// Create MACVLAN interface.
- _, err = shared.RunCommand("ip", "link", "add", "dev", saveData["host_name"], "link", parentName, "type", "macvlan", "mode", "bridge")
+ _, err = shared.RunCommand("ip", "link", "add", "dev", saveData["host_name"], "link", actualParentName, "type", "macvlan", "mode", "bridge")
if err != nil {
return nil, err
}
} else if d.inst.Type() == instancetype.VM {
// Create MACVTAP interface.
- _, err = shared.RunCommand("ip", "link", "add", "dev", saveData["host_name"], "link", parentName, "type", "macvtap", "mode", "bridge")
+ _, err = shared.RunCommand("ip", "link", "add", "dev", saveData["host_name"], "link", actualParentName, "type", "macvtap", "mode", "bridge")
if err != nil {
return nil, err
}
@@ -187,8 +187,8 @@ func (d *nicMACVLAN) postStop() error {
// This will delete the parent interface if we created it for VLAN parent.
if shared.IsTrue(v["last_state.created"]) {
- parentName := NetworkGetHostDevice(d.config["parent"], d.config["vlan"])
- err := NetworkRemoveInterfaceIfNeeded(d.state, parentName, d.inst, d.config["parent"], d.config["vlan"])
+ actualParentName := NetworkGetHostDevice(d.config["parent"], d.config["vlan"])
+ err := NetworkRemoveInterfaceIfNeeded(d.state, actualParentName, d.inst, d.config["parent"], d.config["vlan"])
if err != nil {
errs = append(errs, err)
}
From 4f4d0837f2b1264b6957809e2afd17d128999406 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Fri, 24 Jan 2020 09:31:09 +0000
Subject: [PATCH 06/15] lxd/device/device/utils/network: Adds
networkGetDevicePCIDevice function
Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
lxd/device/device_utils_network.go | 44 ++++++++++++++++++++++++++++++
1 file changed, 44 insertions(+)
diff --git a/lxd/device/device_utils_network.go b/lxd/device/device_utils_network.go
index b2fc04fbd0..a37b69aaef 100644
--- a/lxd/device/device_utils_network.go
+++ b/lxd/device/device_utils_network.go
@@ -783,3 +783,47 @@ func networkParsePortRange(r string) (int64, int64, error) {
return base, size, nil
}
+
+// pciDevice represents info about a PCI uevent device.
+type pciDevice struct {
+ ID string
+ SlotName string
+ Driver string
+}
+
+// networkGetDevicePCISlot returns the PCI device info for a given uevent file.
+func networkGetDevicePCIDevice(ueventFilePath string) (pciDevice, error) {
+ dev := pciDevice{}
+
+ file, err := os.Open(ueventFilePath)
+ if err != nil {
+ return dev, err
+ }
+ defer file.Close()
+
+ scanner := bufio.NewScanner(file)
+ for scanner.Scan() {
+ // Looking for something like this "PCI_SLOT_NAME=0000:05:10.0"
+ fields := strings.SplitN(scanner.Text(), "=", 2)
+ if len(fields) == 2 {
+ if fields[0] == "PCI_SLOT_NAME" {
+ dev.SlotName = fields[1]
+ } else if fields[0] == "PCI_ID" {
+ dev.ID = fields[1]
+ } else if fields[0] == "DRIVER" {
+ dev.Driver = fields[1]
+ }
+ }
+ }
+
+ err = scanner.Err()
+ if err != nil {
+ return dev, err
+ }
+
+ if dev.SlotName == "" {
+ return dev, fmt.Errorf("Device uevent file could not be parsed")
+ }
+
+ return dev, nil
+}
From 5c97f7bd686ff02afd33d7862a9731e6b8d52eca Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Fri, 24 Jan 2020 09:31:47 +0000
Subject: [PATCH 07/15] lxd/device/nic/sriov: Updates networkGetVFDevicePCISlot
to use networkGetDevicePCIDevice
Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
lxd/device/nic_sriov.go | 21 +++------------------
1 file changed, 3 insertions(+), 18 deletions(-)
diff --git a/lxd/device/nic_sriov.go b/lxd/device/nic_sriov.go
index d57c52604c..03e2a63ccb 100644
--- a/lxd/device/nic_sriov.go
+++ b/lxd/device/nic_sriov.go
@@ -5,7 +5,6 @@ import (
"bytes"
"fmt"
"io/ioutil"
- "os"
"os/exec"
"path/filepath"
"regexp"
@@ -483,27 +482,13 @@ func (d *nicSRIOV) networkGetVirtFuncInfo(devName string, vfID int) (vf virtFunc
// networkGetVFDevicePCISlot returns the PCI slot name for a network virtual function device.
func (d *nicSRIOV) networkGetVFDevicePCISlot(vfID string) (string, error) {
- file, err := os.Open(fmt.Sprintf("/sys/class/net/%s/device/virtfn%s/uevent", d.config["parent"], vfID))
+ ueventFile := fmt.Sprintf("/sys/class/net/%s/device/virtfn%s/uevent", d.config["parent"], vfID)
+ pciDev, err := networkGetDevicePCIDevice(ueventFile)
if err != nil {
return "", err
}
- defer file.Close()
- scanner := bufio.NewScanner(file)
- for scanner.Scan() {
- // Looking for something like this "PCI_SLOT_NAME=0000:05:10.0"
- fields := strings.SplitN(scanner.Text(), "=", 2)
- if len(fields) == 2 && fields[0] == "PCI_SLOT_NAME" {
- return fields[1], nil
- }
- }
-
- err = scanner.Err()
- if err != nil {
- return "", err
- }
-
- return "", fmt.Errorf("PCI_SLOT_NAME not found")
+ return pciDev.SlotName, nil
}
// networkGetVFDeviceDriverPath returns the path to the network virtual function device driver in /sys.
From bc95615a83476260c594db61660ed686c7e5e038 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Fri, 24 Jan 2020 09:36:00 +0000
Subject: [PATCH 08/15] lxd/instance/drivers/driver/qemu: Adds physical NIC
passthrough support
Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
lxd/instance/drivers/driver_qemu.go | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/lxd/instance/drivers/driver_qemu.go b/lxd/instance/drivers/driver_qemu.go
index ef0f5dc99b..3495aed123 100644
--- a/lxd/instance/drivers/driver_qemu.go
+++ b/lxd/instance/drivers/driver_qemu.go
@@ -1457,7 +1457,7 @@ func (vm *qemu) addDriveConfig(sb *strings.Builder, bootIndexes map[string]int,
// addNetDevConfig adds the qemu config required for adding a network device.
func (vm *qemu) addNetDevConfig(sb *strings.Builder, nicIndex int, bootIndexes map[string]int, nicConfig []deviceConfig.RunConfigItem, fdFiles *[]string) error {
- var devName, nicName, devHwaddr string
+ var devName, nicName, devHwaddr, pciSlotName string
for _, nicItem := range nicConfig {
if nicItem.Key == "devName" {
devName = nicItem.Value
@@ -1465,6 +1465,8 @@ func (vm *qemu) addNetDevConfig(sb *strings.Builder, nicIndex int, bootIndexes m
nicName = nicItem.Value
} else if nicItem.Key == "hwaddr" {
devHwaddr = nicItem.Value
+ } else if nicItem.Key == "pciSlotName" {
+ pciSlotName = nicItem.Value
}
}
@@ -1499,6 +1501,10 @@ func (vm *qemu) addNetDevConfig(sb *strings.Builder, nicIndex int, bootIndexes m
// Detect TAP (via TUN driver) device.
tplFields["ifName"] = nicName
tpl = qemuNetDevTapTun
+ } else if pciSlotName != "" {
+ // Detect physical passthrough device.
+ tplFields["pciSlotName"] = pciSlotName
+ tpl = qemuNetdevPhysical
}
if tpl != nil {
From 8065e5061685cfbe10386f968faa13e33aa3be1a Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Fri, 24 Jan 2020 12:01:48 +0000
Subject: [PATCH 09/15] shared/instance: Updates config key checker to allow
".driver" keys
Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
shared/instance.go | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/shared/instance.go b/shared/instance.go
index 1264e991c7..7830c0e806 100644
--- a/shared/instance.go
+++ b/shared/instance.go
@@ -418,6 +418,10 @@ func ConfigKeyChecker(key string) (func(value string) error, error) {
if strings.HasSuffix(key, ".ceph_rbd") {
return IsAny, nil
}
+
+ if strings.HasSuffix(key, ".driver") {
+ return IsAny, nil
+ }
}
if strings.HasPrefix(key, "environment.") {
From 7efa6875cd943c7f1fcd4040611c926167d8a5de Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Mon, 27 Jan 2020 16:09:13 +0000
Subject: [PATCH 10/15] lxd/device/device/utils/network: Adds generic PCI
device bind/unbind functions
To be used with both physical VM NICs and sriov NICs.
Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
lxd/device/device_utils_network.go | 51 ++++++++++++++++++++++++++++++
1 file changed, 51 insertions(+)
diff --git a/lxd/device/device_utils_network.go b/lxd/device/device_utils_network.go
index a37b69aaef..0d419a1588 100644
--- a/lxd/device/device_utils_network.go
+++ b/lxd/device/device_utils_network.go
@@ -12,6 +12,7 @@ import (
"strconv"
"strings"
"sync"
+ "time"
"github.com/pkg/errors"
@@ -827,3 +828,53 @@ func networkGetDevicePCIDevice(ueventFilePath string) (pciDevice, error) {
return dev, nil
}
+
+// networkDeviceUnbind unbinds a network device from the OS using its PCI Slot Name and driver name.
+func networkDeviceUnbind(pciDev pciDevice) error {
+ driverUnbindPath := fmt.Sprintf("/sys/bus/pci/drivers/%s/unbind", pciDev.Driver)
+ err := ioutil.WriteFile(driverUnbindPath, []byte(pciDev.SlotName), 0600)
+ if err != nil {
+ return errors.Wrapf(err, "Failed unbinding device %q via %q", pciDev.SlotName, driverUnbindPath)
+ }
+
+ return nil
+}
+
+// networkDeviceBind binds a network device to the OS using its PCI Slot Name and driver name.
+func networkDeviceBind(pciDev pciDevice) error {
+ driverBindPath := fmt.Sprintf("/sys/bus/pci/drivers/%s/bind", pciDev.Driver)
+ err := ioutil.WriteFile(driverBindPath, []byte(pciDev.SlotName), 0600)
+ if err != nil {
+ return errors.Wrapf(err, "Failed binding device %q via %q", pciDev.SlotName, driverBindPath)
+ }
+
+ return nil
+}
+
+// networkDeviceBindWait waits for network device to appear after being binded to a driver.
+func networkDeviceBindWait(pciDev pciDevice) error {
+ devicePath := fmt.Sprintf("/sys/bus/pci/drivers/%s/%s", pciDev.Driver, pciDev.SlotName)
+
+ for i := 0; i < 10; i++ {
+ if shared.PathExists(devicePath) {
+ return nil
+ }
+
+ time.Sleep(50 * time.Millisecond)
+ }
+
+ return fmt.Errorf("Bind of device %q took too long", devicePath)
+}
+
+// networkInterfaceBindWait waits for network interface to appear after being binded to a driver.
+func networkInterfaceBindWait(ifName string) error {
+ for i := 0; i < 10; i++ {
+ if shared.PathExists(fmt.Sprintf("/sys/class/net/%s", ifName)) {
+ return nil
+ }
+
+ time.Sleep(50 * time.Millisecond)
+ }
+
+ return fmt.Errorf("Bind of interface %q took too long", ifName)
+}
From 829ce0264eef50f267fcba31848ccdd0f596d2d1 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Mon, 27 Jan 2020 17:07:15 +0000
Subject: [PATCH 11/15] lxd/device/device/utils/network: Adds
networkVFIOPCIRegister
Allows a PCI device to be registered with the vfio-pci driver.
Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
lxd/device/device_utils_network.go | 18 ++++++++++++++++++
1 file changed, 18 insertions(+)
diff --git a/lxd/device/device_utils_network.go b/lxd/device/device_utils_network.go
index 0d419a1588..289bb8efe3 100644
--- a/lxd/device/device_utils_network.go
+++ b/lxd/device/device_utils_network.go
@@ -878,3 +878,21 @@ func networkInterfaceBindWait(ifName string) error {
return fmt.Errorf("Bind of interface %q took too long", ifName)
}
+
+// networkVFIOPCIRegister registers the PCI device with the VFIO-PCI driver.
+// Should also bind the device to the vfio-pci driver if it is present. Requires the vfio-pci module is loaded.
+func networkVFIOPCIRegister(pciDev pciDevice) error {
+ // vfio-pci module takes device IDs as "n n" but networkGetDevicePCIDevice returns them as "n:n".
+ devIDParts := strings.SplitN(pciDev.ID, ":", 2)
+ if len(devIDParts) < 2 {
+ return fmt.Errorf("Invalid device ID from %q", pciDev.ID)
+ }
+
+ vfioPCINewIDPath := "/sys/bus/pci/drivers/vfio-pci/new_id"
+ err := ioutil.WriteFile(vfioPCINewIDPath, []byte(fmt.Sprintf("%s %s", devIDParts[0], devIDParts[1])), 0600)
+ if err != nil {
+ return errors.Wrapf(err, "Failed registering PCI device ID %q to %q", pciDev.ID, vfioPCINewIDPath)
+ }
+
+ return nil
+}
From 3a610429a8f8d37f98162aabb0dd820844cac6db Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Mon, 27 Jan 2020 16:10:51 +0000
Subject: [PATCH 12/15] lxd/device/nic/sriov: Switches PCI device bind/unbind
to generic functions
Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
lxd/device/nic_sriov.go | 70 ++++++++---------------------------------
1 file changed, 13 insertions(+), 57 deletions(-)
diff --git a/lxd/device/nic_sriov.go b/lxd/device/nic_sriov.go
index 03e2a63ccb..10a111ef6f 100644
--- a/lxd/device/nic_sriov.go
+++ b/lxd/device/nic_sriov.go
@@ -6,11 +6,9 @@ import (
"fmt"
"io/ioutil"
"os/exec"
- "path/filepath"
"regexp"
"strconv"
"strings"
- "time"
deviceConfig "github.com/lxc/lxd/lxd/device/config"
"github.com/lxc/lxd/lxd/instance/instancetype"
@@ -336,27 +334,20 @@ func (d *nicSRIOV) setupSriovParent(vfDevice string, vfID int, volatile map[stri
}
// Get VF device's PCI Slot Name so we can unbind and rebind it from the host.
- vfPCISlot, err := d.networkGetVFDevicePCISlot(volatile["last_state.vf.id"])
- if err != nil {
- return err
- }
-
- // Get the path to the VF device's driver now, as once it is unbound we won't be able to
- // determine its driver path in order to rebind it.
- vfDriverPath, err := d.networkGetVFDeviceDriverPath(volatile["last_state.vf.id"])
+ vfPCIDev, err := d.networkGetVFDevicePCISlot(volatile["last_state.vf.id"])
if err != nil {
return err
}
// Unbind VF device from the host so that the settings will take effect when we rebind it.
- err = d.networkDeviceUnbind(vfPCISlot, vfDriverPath)
+ err = networkDeviceUnbind(vfPCIDev)
if err != nil {
return err
}
// However we return from this function, we must try to rebind the VF so its not orphaned.
// The OS won't let an already bound device be bound again so is safe to call twice.
- defer d.networkDeviceBind(vfPCISlot, vfDriverPath)
+ defer networkDeviceBind(vfPCIDev)
// Setup VF VLAN if specified.
if d.config["vlan"] != "" {
@@ -402,7 +393,7 @@ func (d *nicSRIOV) setupSriovParent(vfDevice string, vfID int, volatile map[stri
}
// Bind VF device onto the host so that the settings will take effect.
- err = d.networkDeviceBind(vfPCISlot, vfDriverPath)
+ err = networkDeviceBind(vfPCIDev)
if err != nil {
return err
}
@@ -411,7 +402,7 @@ func (d *nicSRIOV) setupSriovParent(vfDevice string, vfID int, volatile map[stri
// it will re-appear shortly after. Unfortunately the time between sending the bind event
// to the nic and it actually appearing on the host is non-zero, so we need to watch and wait,
// otherwise next steps of applying settings to interface will fail.
- err = d.networkDeviceBindWait(volatile["host_name"])
+ err = networkInterfaceBindWait(volatile["host_name"])
if err != nil {
return err
}
@@ -481,42 +472,14 @@ func (d *nicSRIOV) networkGetVirtFuncInfo(devName string, vfID int) (vf virtFunc
}
// networkGetVFDevicePCISlot returns the PCI slot name for a network virtual function device.
-func (d *nicSRIOV) networkGetVFDevicePCISlot(vfID string) (string, error) {
+func (d *nicSRIOV) networkGetVFDevicePCISlot(vfID string) (pciDevice, error) {
ueventFile := fmt.Sprintf("/sys/class/net/%s/device/virtfn%s/uevent", d.config["parent"], vfID)
pciDev, err := networkGetDevicePCIDevice(ueventFile)
if err != nil {
- return "", err
+ return pciDev, err
}
- return pciDev.SlotName, nil
-}
-
-// networkGetVFDeviceDriverPath returns the path to the network virtual function device driver in /sys.
-func (d *nicSRIOV) networkGetVFDeviceDriverPath(vfID string) (string, error) {
- return filepath.EvalSymlinks(fmt.Sprintf("/sys/class/net/%s/device/virtfn%s/driver", d.config["parent"], vfID))
-}
-
-// networkDeviceUnbind unbinds a network device from the OS using its PCI Slot Name and driver path.
-func (d *nicSRIOV) networkDeviceUnbind(pciSlotName string, driverPath string) error {
- return ioutil.WriteFile(fmt.Sprintf("%s/unbind", driverPath), []byte(pciSlotName), 0600)
-}
-
-// networkDeviceUnbind binds a network device to the OS using its PCI Slot Name and driver path.
-func (d *nicSRIOV) networkDeviceBind(pciSlotName string, driverPath string) error {
- return ioutil.WriteFile(fmt.Sprintf("%s/bind", driverPath), []byte(pciSlotName), 0600)
-}
-
-// networkDeviceBindWait waits for network interface to appear after being binded.
-func (d *nicSRIOV) networkDeviceBindWait(devName string) error {
- for i := 0; i < 10; i++ {
- if shared.PathExists(fmt.Sprintf("/sys/class/net/%s", devName)) {
- return nil
- }
-
- time.Sleep(50 * time.Millisecond)
- }
-
- return fmt.Errorf("Bind of interface \"%s\" took too long", devName)
+ return pciDev, nil
}
// restoreSriovParent restores SR-IOV parent device settings when removed from an instance using the
@@ -528,27 +491,20 @@ func (d *nicSRIOV) restoreSriovParent(volatile map[string]string) error {
}
// Get VF device's PCI Slot Name so we can unbind and rebind it from the host.
- vfPCISlot, err := d.networkGetVFDevicePCISlot(volatile["last_state.vf.id"])
- if err != nil {
- return err
- }
-
- // Get the path to the VF device's driver now, as once it is unbound we won't be able to
- // determine its driver path in order to rebind it.
- vfDriverPath, err := d.networkGetVFDeviceDriverPath(volatile["last_state.vf.id"])
+ vfPCIDev, err := d.networkGetVFDevicePCISlot(volatile["last_state.vf.id"])
if err != nil {
return err
}
// Unbind VF device from the host so that the settings will take effect when we rebind it.
- err = d.networkDeviceUnbind(vfPCISlot, vfDriverPath)
+ err = networkDeviceUnbind(vfPCIDev)
if err != nil {
return err
}
// However we return from this function, we must try to rebind the VF so its not orphaned.
// The OS won't let an already bound device be bound again so is safe to call twice.
- defer d.networkDeviceBind(vfPCISlot, vfDriverPath)
+ defer networkDeviceBind(vfPCIDev)
// Reset VF VLAN if specified
if volatile["last_state.vf.vlan"] != "" {
@@ -581,7 +537,7 @@ func (d *nicSRIOV) restoreSriovParent(volatile map[string]string) error {
}
// Bind VF device onto the host so that the settings will take effect.
- err = d.networkDeviceBind(vfPCISlot, vfDriverPath)
+ err = networkDeviceBind(vfPCIDev)
if err != nil {
return err
}
@@ -590,7 +546,7 @@ func (d *nicSRIOV) restoreSriovParent(volatile map[string]string) error {
// and it will re-appear on the host. Unfortunately the time between sending the bind event
// to the nic and it actually appearing on the host is non-zero, so we need to watch and wait,
// otherwise next step of restoring MAC and MTU settings in restorePhysicalNic will fail.
- err = d.networkDeviceBindWait(volatile["host_name"])
+ err = networkInterfaceBindWait(volatile["host_name"])
if err != nil {
return err
}
From 5e3807da38b747ec3c67e40f33f54e36020dbe6a Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Fri, 24 Jan 2020 12:03:03 +0000
Subject: [PATCH 13/15] lxd/device/nic/physical: Adds VM PCI passthrough
support
Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
lxd/device/nic_physical.go | 162 ++++++++++++++++++++++++++++---------
1 file changed, 123 insertions(+), 39 deletions(-)
diff --git a/lxd/device/nic_physical.go b/lxd/device/nic_physical.go
index e8470de7ef..25e36a7cc8 100644
--- a/lxd/device/nic_physical.go
+++ b/lxd/device/nic_physical.go
@@ -3,9 +3,12 @@ package device
import (
"fmt"
+ "github.com/pkg/errors"
+
deviceConfig "github.com/lxc/lxd/lxd/device/config"
"github.com/lxc/lxd/lxd/instance/instancetype"
"github.com/lxc/lxd/lxd/revert"
+ "github.com/lxc/lxd/lxd/util"
"github.com/lxc/lxd/shared"
)
@@ -15,20 +18,22 @@ type nicPhysical struct {
// validateConfig checks the supplied config for correctness.
func (d *nicPhysical) validateConfig() error {
- if d.inst.Type() != instancetype.Container {
+ if d.inst.Type() != instancetype.Container && d.inst.Type() != instancetype.VM {
return ErrUnsupportedDevType
}
requiredFields := []string{"parent"}
optionalFields := []string{
"name",
- "mtu",
- "hwaddr",
- "vlan",
"maas.subnet.ipv4",
"maas.subnet.ipv6",
"boot.priority",
}
+
+ if d.inst.Type() == instancetype.Container {
+ optionalFields = append(optionalFields, "mtu", "hwaddr", "vlan")
+ }
+
err := d.config.Validate(nicValidationRules(requiredFields, optionalFields))
if err != nil {
return err
@@ -66,45 +71,97 @@ func (d *nicPhysical) Start() (*deviceConfig.RunConfig, error) {
revert := revert.New()
defer revert.Fail()
+ // pciSlotName, used for VM physical passthrough.
+ var pciSlotName string
+
+ // If VM, then try and load the vfio-pci module first.
+ if d.inst.Type() == instancetype.VM {
+ err = util.LoadModule("vfio-pci")
+ if err != nil {
+ return nil, errors.Wrapf(err, "Error loading %q module", "vfio-pci")
+ }
+ }
+
// Record the host_name device used for restoration later.
saveData["host_name"] = NetworkGetHostDevice(d.config["parent"], d.config["vlan"])
- statusDev, err := NetworkCreateVlanDeviceIfNeeded(d.state, d.config["parent"], saveData["host_name"], d.config["vlan"])
- if err != nil {
- return nil, err
- }
- // Record whether we created this device or not so it can be removed on stop.
- saveData["last_state.created"] = fmt.Sprintf("%t", statusDev != "existing")
+ if d.inst.Type() == instancetype.Container {
+ statusDev, err := NetworkCreateVlanDeviceIfNeeded(d.state, d.config["parent"], saveData["host_name"], d.config["vlan"])
+ if err != nil {
+ return nil, err
+ }
- if shared.IsTrue(saveData["last_state.created"]) {
- revert.Add(func() {
- NetworkRemoveInterfaceIfNeeded(d.state, saveData["host_name"], d.inst, d.config["parent"], d.config["vlan"])
- })
- }
+ // Record whether we created this device or not so it can be removed on stop.
+ saveData["last_state.created"] = fmt.Sprintf("%t", statusDev != "existing")
+
+ if shared.IsTrue(saveData["last_state.created"]) {
+ revert.Add(func() {
+ NetworkRemoveInterfaceIfNeeded(d.state, saveData["host_name"], d.inst, d.config["parent"], d.config["vlan"])
+ })
+ }
+
+ // If we didn't create the device we should track various properties so we can restore them when the
+ // instance is stopped or the device is detached.
+ if !shared.IsTrue(saveData["last_state.created"]) {
+ err = networkSnapshotPhysicalNic(saveData["host_name"], saveData)
+ if err != nil {
+ return nil, err
+ }
+ }
+
+ // Set the MAC address.
+ if d.config["hwaddr"] != "" {
+ _, err := shared.RunCommand("ip", "link", "set", "dev", saveData["host_name"], "address", d.config["hwaddr"])
+ if err != nil {
+ return nil, fmt.Errorf("Failed to set the MAC address: %s", err)
+ }
+ }
+
+ // Set the MTU.
+ if d.config["mtu"] != "" {
+ _, err := shared.RunCommand("ip", "link", "set", "dev", saveData["host_name"], "mtu", d.config["mtu"])
+ if err != nil {
+ return nil, fmt.Errorf("Failed to set the MTU: %s", err)
+ }
+ }
+ } else if d.inst.Type() == instancetype.VM {
+ // Get PCI information about the network interface.
+ ueventPath := fmt.Sprintf("/sys/class/net/%s/device/uevent", saveData["host_name"])
+ pciDev, err := networkGetDevicePCIDevice(ueventPath)
+ if err != nil {
+ return nil, errors.Wrapf(err, "Failed to get PCI device info for %q", saveData["host_name"])
+ }
+
+ saveData["last_state.pci.slot.name"] = pciDev.SlotName
+ saveData["last_state.pci.driver"] = pciDev.Driver
- // If we didn't create the device we should track various properties so we can restore them when the
- // instance is stopped or the device is detached.
- if !shared.IsTrue(saveData["last_state.created"]) {
- err = networkSnapshotPhysicalNic(saveData["host_name"], saveData)
+ // Unbind the interface from the host.
+ err = networkDeviceUnbind(pciDev)
if err != nil {
return nil, err
}
- }
- // Set the MAC address.
- if d.config["hwaddr"] != "" {
- _, err := shared.RunCommand("ip", "link", "set", "dev", saveData["host_name"], "address", d.config["hwaddr"])
+ revert.Add(func() { networkDeviceBind(pciDev) })
+
+ // Register the device with the vfio-pci module.
+ err = networkVFIOPCIRegister(pciDev)
if err != nil {
- return nil, fmt.Errorf("Failed to set the MAC address: %s", err)
+ return nil, err
}
- }
- // Set the MTU.
- if d.config["mtu"] != "" {
- _, err := shared.RunCommand("ip", "link", "set", "dev", saveData["host_name"], "mtu", d.config["mtu"])
+ vfioDev := pciDevice{
+ Driver: "vfio-pci",
+ SlotName: pciDev.SlotName,
+ }
+
+ revert.Add(func() { networkDeviceUnbind(vfioDev) })
+
+ err = networkDeviceBindWait(vfioDev)
if err != nil {
- return nil, fmt.Errorf("Failed to set the MTU: %s", err)
+ return nil, err
}
+
+ pciSlotName = saveData["last_state.pci.slot.name"]
}
err = d.volatileSet(saveData)
@@ -124,6 +181,7 @@ func (d *nicPhysical) Start() (*deviceConfig.RunConfig, error) {
runConf.NetworkInterface = append(runConf.NetworkInterface,
[]deviceConfig.RunConfigItem{
{Key: "devName", Value: d.name},
+ {Key: "pciSlotName", Value: pciSlotName},
}...)
}
@@ -147,26 +205,52 @@ func (d *nicPhysical) Stop() (*deviceConfig.RunConfig, error) {
// postStop is run after the device is removed from the instance.
func (d *nicPhysical) postStop() error {
defer d.volatileSet(map[string]string{
- "host_name": "",
- "last_state.hwaddr": "",
- "last_state.mtu": "",
- "last_state.created": "",
+ "host_name": "",
+ "last_state.hwaddr": "",
+ "last_state.mtu": "",
+ "last_state.created": "",
+ "last_state.pci.slot.name": "",
+ "last_state.pci.driver": "",
})
v := d.volatileGet()
- hostName := NetworkGetHostDevice(d.config["parent"], d.config["vlan"])
- // This will delete the parent interface if we created it for VLAN parent.
- if shared.IsTrue(v["last_state.created"]) {
- err := NetworkRemoveInterfaceIfNeeded(d.state, hostName, d.inst, d.config["parent"], d.config["vlan"])
+ // If VM physical pass through, unbind from vfio-pci and bind back to host driver.
+ if d.inst.Type() == instancetype.VM && v["last_state.pci.slot.name"] != "" {
+ vfioDev := pciDevice{
+ Driver: "vfio-pci",
+ SlotName: v["last_state.pci.slot.name"],
+ }
+
+ err := networkDeviceUnbind(vfioDev)
if err != nil {
return err
}
- } else {
- err := networkRestorePhysicalNic(hostName, v)
+
+ hostDev := pciDevice{
+ Driver: v["last_state.pci.driver"],
+ SlotName: v["last_state.pci.slot.name"],
+ }
+
+ err = networkDeviceBind(hostDev)
if err != nil {
return err
}
+ } else if d.inst.Type() == instancetype.Container {
+ hostName := NetworkGetHostDevice(d.config["parent"], d.config["vlan"])
+
+ // This will delete the parent interface if we created it for VLAN parent.
+ if shared.IsTrue(v["last_state.created"]) {
+ err := NetworkRemoveInterfaceIfNeeded(d.state, hostName, d.inst, d.config["parent"], d.config["vlan"])
+ if err != nil {
+ return err
+ }
+ } else if v["last_state.pci.slot.name"] == "" {
+ err := networkRestorePhysicalNic(hostName, v)
+ if err != nil {
+ return err
+ }
+ }
}
return nil
From ba93cc69ec37eab0b73ab7316dca2b14d563a894 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Mon, 27 Jan 2020 16:20:03 +0000
Subject: [PATCH 14/15] lxd/device: Unexports NetworkRemoveInterfaceIfNeeded
Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
lxd/device/device_utils_network.go | 4 ++--
lxd/device/nic_ipvlan.go | 2 +-
lxd/device/nic_macvlan.go | 4 ++--
lxd/device/nic_physical.go | 4 ++--
lxd/device/nic_routed.go | 2 +-
5 files changed, 8 insertions(+), 8 deletions(-)
diff --git a/lxd/device/device_utils_network.go b/lxd/device/device_utils_network.go
index 289bb8efe3..9abd4babc1 100644
--- a/lxd/device/device_utils_network.go
+++ b/lxd/device/device_utils_network.go
@@ -139,8 +139,8 @@ func NetworkRemoveInterface(nic string) error {
return err
}
-// NetworkRemoveInterfaceIfNeeded removes a network interface by name but only if no other instance is using it.
-func NetworkRemoveInterfaceIfNeeded(state *state.State, nic string, current instance.Instance, parent string, vlanID string) error {
+// networkRemoveInterfaceIfNeeded removes a network interface by name but only if no other instance is using it.
+func networkRemoveInterfaceIfNeeded(state *state.State, nic string, current instance.Instance, parent string, vlanID string) error {
// Check if it's used by another instance.
instances, err := InstanceLoadNodeAll(state)
if err != nil {
diff --git a/lxd/device/nic_ipvlan.go b/lxd/device/nic_ipvlan.go
index ee61a8353e..75c8227f8f 100644
--- a/lxd/device/nic_ipvlan.go
+++ b/lxd/device/nic_ipvlan.go
@@ -232,7 +232,7 @@ func (d *nicIPVLAN) postStop() error {
// This will delete the parent interface if we created it for VLAN parent.
if shared.IsTrue(v["last_state.created"]) {
parentName := NetworkGetHostDevice(d.config["parent"], d.config["vlan"])
- err := NetworkRemoveInterfaceIfNeeded(d.state, parentName, d.inst, d.config["parent"], d.config["vlan"])
+ err := networkRemoveInterfaceIfNeeded(d.state, parentName, d.inst, d.config["parent"], d.config["vlan"])
if err != nil {
return err
}
diff --git a/lxd/device/nic_macvlan.go b/lxd/device/nic_macvlan.go
index 65401ebcc9..03d34a9e47 100644
--- a/lxd/device/nic_macvlan.go
+++ b/lxd/device/nic_macvlan.go
@@ -83,7 +83,7 @@ func (d *nicMACVLAN) Start() (*deviceConfig.RunConfig, error) {
if shared.IsTrue(saveData["last_state.created"]) {
revert.Add(func() {
- NetworkRemoveInterfaceIfNeeded(d.state, actualParentName, d.inst, d.config["parent"], d.config["vlan"])
+ networkRemoveInterfaceIfNeeded(d.state, actualParentName, d.inst, d.config["parent"], d.config["vlan"])
})
}
@@ -188,7 +188,7 @@ func (d *nicMACVLAN) postStop() error {
// This will delete the parent interface if we created it for VLAN parent.
if shared.IsTrue(v["last_state.created"]) {
actualParentName := NetworkGetHostDevice(d.config["parent"], d.config["vlan"])
- err := NetworkRemoveInterfaceIfNeeded(d.state, actualParentName, d.inst, d.config["parent"], d.config["vlan"])
+ err := networkRemoveInterfaceIfNeeded(d.state, actualParentName, d.inst, d.config["parent"], d.config["vlan"])
if err != nil {
errs = append(errs, err)
}
diff --git a/lxd/device/nic_physical.go b/lxd/device/nic_physical.go
index 25e36a7cc8..b8d4662b72 100644
--- a/lxd/device/nic_physical.go
+++ b/lxd/device/nic_physical.go
@@ -96,7 +96,7 @@ func (d *nicPhysical) Start() (*deviceConfig.RunConfig, error) {
if shared.IsTrue(saveData["last_state.created"]) {
revert.Add(func() {
- NetworkRemoveInterfaceIfNeeded(d.state, saveData["host_name"], d.inst, d.config["parent"], d.config["vlan"])
+ networkRemoveInterfaceIfNeeded(d.state, saveData["host_name"], d.inst, d.config["parent"], d.config["vlan"])
})
}
@@ -241,7 +241,7 @@ func (d *nicPhysical) postStop() error {
// This will delete the parent interface if we created it for VLAN parent.
if shared.IsTrue(v["last_state.created"]) {
- err := NetworkRemoveInterfaceIfNeeded(d.state, hostName, d.inst, d.config["parent"], d.config["vlan"])
+ err := networkRemoveInterfaceIfNeeded(d.state, hostName, d.inst, d.config["parent"], d.config["vlan"])
if err != nil {
return err
}
diff --git a/lxd/device/nic_routed.go b/lxd/device/nic_routed.go
index 3f6d0cb66f..6077ce41bc 100644
--- a/lxd/device/nic_routed.go
+++ b/lxd/device/nic_routed.go
@@ -310,7 +310,7 @@ func (d *nicRouted) postStop() error {
// This will delete the parent interface if we created it for VLAN parent.
if shared.IsTrue(v["last_state.created"]) {
parentName := NetworkGetHostDevice(d.config["parent"], d.config["vlan"])
- err := NetworkRemoveInterfaceIfNeeded(d.state, parentName, d.inst, d.config["parent"], d.config["vlan"])
+ err := networkRemoveInterfaceIfNeeded(d.state, parentName, d.inst, d.config["parent"], d.config["vlan"])
if err != nil {
return err
}
From 47bd67bb1ff0a4207ac6abdb13505cca77a5aabb Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Mon, 27 Jan 2020 17:54:39 +0000
Subject: [PATCH 15/15] lxd/device/nic/sriov: Adds VM support
Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
lxd/device/nic_sriov.go | 181 ++++++++++++++++++++++++++++------------
1 file changed, 128 insertions(+), 53 deletions(-)
diff --git a/lxd/device/nic_sriov.go b/lxd/device/nic_sriov.go
index 10a111ef6f..9269604381 100644
--- a/lxd/device/nic_sriov.go
+++ b/lxd/device/nic_sriov.go
@@ -12,6 +12,7 @@ import (
deviceConfig "github.com/lxc/lxd/lxd/device/config"
"github.com/lxc/lxd/lxd/instance/instancetype"
+ "github.com/lxc/lxd/lxd/revert"
"github.com/lxc/lxd/shared"
)
@@ -21,14 +22,13 @@ type nicSRIOV struct {
// validateConfig checks the supplied config for correctness.
func (d *nicSRIOV) validateConfig() error {
- if d.inst.Type() != instancetype.Container {
+ if d.inst.Type() != instancetype.Container && d.inst.Type() != instancetype.VM {
return ErrUnsupportedDevType
}
requiredFields := []string{"parent"}
optionalFields := []string{
"name",
- "mtu",
"hwaddr",
"vlan",
"security.mac_filtering",
@@ -36,6 +36,12 @@ func (d *nicSRIOV) validateConfig() error {
"maas.subnet.ipv6",
"boot.priority",
}
+
+ // For VMs only NIC properties that can be specified on the parent's VF settings are controllable.
+ if d.inst.Type() == instancetype.Container {
+ optionalFields = append(optionalFields, "mtu")
+ }
+
err := d.config.Validate(nicValidationRules(requiredFields, optionalFields))
if err != nil {
return err
@@ -76,31 +82,33 @@ func (d *nicSRIOV) Start() (*deviceConfig.RunConfig, error) {
return nil, err
}
- err = d.setupSriovParent(vfDev, vfID, saveData)
+ vfPCIDev, err := d.setupSriovParent(vfDev, vfID, saveData)
if err != nil {
return nil, err
}
- // Set the MAC address.
- if d.config["hwaddr"] != "" {
- _, err := shared.RunCommand("ip", "link", "set", "dev", saveData["host_name"], "address", d.config["hwaddr"])
- if err != nil {
- return nil, fmt.Errorf("Failed to set the MAC address: %s", err)
+ if d.inst.Type() == instancetype.Container {
+ // Set the MAC address.
+ if d.config["hwaddr"] != "" {
+ _, err := shared.RunCommand("ip", "link", "set", "dev", saveData["host_name"], "address", d.config["hwaddr"])
+ if err != nil {
+ return nil, fmt.Errorf("Failed to set the MAC address: %s", err)
+ }
}
- }
- // Set the MTU.
- if d.config["mtu"] != "" {
- _, err := shared.RunCommand("ip", "link", "set", "dev", saveData["host_name"], "mtu", d.config["mtu"])
- if err != nil {
- return nil, fmt.Errorf("Failed to set the MTU: %s", err)
+ // Set the MTU.
+ if d.config["mtu"] != "" {
+ _, err := shared.RunCommand("ip", "link", "set", "dev", saveData["host_name"], "mtu", d.config["mtu"])
+ if err != nil {
+ return nil, fmt.Errorf("Failed to set the MTU: %s", err)
+ }
}
- }
- // Bring the interface up.
- _, err = shared.RunCommand("ip", "link", "set", "dev", saveData["host_name"], "up")
- if err != nil {
- return nil, fmt.Errorf("Failed to bring up the interface: %v", err)
+ // Bring the interface up.
+ _, err = shared.RunCommand("ip", "link", "set", "dev", saveData["host_name"], "up")
+ if err != nil {
+ return nil, fmt.Errorf("Failed to bring up the interface: %v", err)
+ }
}
err = d.volatileSet(saveData)
@@ -116,6 +124,14 @@ func (d *nicSRIOV) Start() (*deviceConfig.RunConfig, error) {
{Key: "link", Value: saveData["host_name"]},
}
+ if d.inst.Type() == instancetype.VM {
+ runConf.NetworkInterface = append(runConf.NetworkInterface,
+ []deviceConfig.RunConfigItem{
+ {Key: "devName", Value: d.name},
+ {Key: "pciSlotName", Value: vfPCIDev.SlotName},
+ }...)
+ }
+
return &runConf, nil
}
@@ -143,6 +159,7 @@ func (d *nicSRIOV) postStop() error {
"last_state.vf.hwaddr": "",
"last_state.vf.vlan": "",
"last_state.vf.spoofcheck": "",
+ "last_state.pci.driver": "",
})
v := d.volatileGet()
@@ -308,15 +325,20 @@ func (d *nicSRIOV) getFreeVFInterface(reservedDevices map[string]struct{}, vfLis
return "", nil
}
-// setupSriovParent configures a SR-IOV virtual function (VF) device on parent and stores original
-// properties of the physical device into voltatile for restoration on detach.
-func (d *nicSRIOV) setupSriovParent(vfDevice string, vfID int, volatile map[string]string) error {
+// setupSriovParent configures a SR-IOV virtual function (VF) device on parent and stores original properties of
+// the physical device into voltatile for restoration on detach. Returns VF PCI device info.
+func (d *nicSRIOV) setupSriovParent(vfDevice string, vfID int, volatile map[string]string) (pciDevice, error) {
+ var vfPCIDev pciDevice
+
// Retrieve VF settings from parent device.
vfInfo, err := d.networkGetVirtFuncInfo(d.config["parent"], vfID)
if err != nil {
- return err
+ return vfPCIDev, err
}
+ revert := revert.New()
+ defer revert.Fail()
+
// Record properties of VF settings on the parent device.
volatile["last_state.vf.hwaddr"] = vfInfo.mac
volatile["last_state.vf.id"] = fmt.Sprintf("%d", vfID)
@@ -330,30 +352,28 @@ func (d *nicSRIOV) setupSriovParent(vfDevice string, vfID int, volatile map[stri
// Record properties of VF device.
err = networkSnapshotPhysicalNic(volatile["host_name"], volatile)
if err != nil {
- return err
+ return vfPCIDev, err
}
// Get VF device's PCI Slot Name so we can unbind and rebind it from the host.
- vfPCIDev, err := d.networkGetVFDevicePCISlot(volatile["last_state.vf.id"])
+ vfPCIDev, err = d.networkGetVFDevicePCISlot(volatile["last_state.vf.id"])
if err != nil {
- return err
+ return vfPCIDev, err
}
// Unbind VF device from the host so that the settings will take effect when we rebind it.
err = networkDeviceUnbind(vfPCIDev)
if err != nil {
- return err
+ return vfPCIDev, err
}
- // However we return from this function, we must try to rebind the VF so its not orphaned.
- // The OS won't let an already bound device be bound again so is safe to call twice.
- defer networkDeviceBind(vfPCIDev)
+ revert.Add(func() { networkDeviceBind(vfPCIDev) })
// Setup VF VLAN if specified.
if d.config["vlan"] != "" {
_, err := shared.RunCommand("ip", "link", "set", "dev", d.config["parent"], "vf", volatile["last_state.vf.id"], "vlan", d.config["vlan"])
if err != nil {
- return err
+ return vfPCIDev, err
}
}
@@ -370,44 +390,82 @@ func (d *nicSRIOV) setupSriovParent(vfDevice string, vfID int, volatile map[stri
// Set MAC on VF (this combined with spoof checking prevents any other MAC being used).
_, err = shared.RunCommand("ip", "link", "set", "dev", d.config["parent"], "vf", volatile["last_state.vf.id"], "mac", mac)
if err != nil {
- return err
+ return vfPCIDev, err
}
// Now that MAC is set on VF, we can enable spoof checking.
_, err = shared.RunCommand("ip", "link", "set", "dev", d.config["parent"], "vf", volatile["last_state.vf.id"], "spoofchk", "on")
if err != nil {
- return err
+ return vfPCIDev, err
}
} else {
// Reset VF to ensure no previous MAC restriction exists.
_, err := shared.RunCommand("ip", "link", "set", "dev", d.config["parent"], "vf", volatile["last_state.vf.id"], "mac", "00:00:00:00:00:00")
if err != nil {
- return err
+ return vfPCIDev, err
}
// Ensure spoof checking is disabled if not enabled in instance.
_, err = shared.RunCommand("ip", "link", "set", "dev", d.config["parent"], "vf", volatile["last_state.vf.id"], "spoofchk", "off")
if err != nil {
- return err
+ return vfPCIDev, err
}
- }
- // Bind VF device onto the host so that the settings will take effect.
- err = networkDeviceBind(vfPCIDev)
- if err != nil {
- return err
+ // Set MAC on VF if specified (this should be passed through into VM when it is bound to vfio-pci).
+ if d.inst.Type() == instancetype.VM {
+ // If no MAC specified in config, use current VF interface MAC.
+ mac := d.config["hwaddr"]
+ if mac == "" {
+ mac = volatile["last_state.hwaddr"]
+ }
+
+ _, err = shared.RunCommand("ip", "link", "set", "dev", d.config["parent"], "vf", volatile["last_state.vf.id"], "mac", mac)
+ if err != nil {
+ return vfPCIDev, err
+ }
+ }
}
- // Wait for VF driver to be reloaded, this will remove the VF interface temporarily, and
- // it will re-appear shortly after. Unfortunately the time between sending the bind event
- // to the nic and it actually appearing on the host is non-zero, so we need to watch and wait,
- // otherwise next steps of applying settings to interface will fail.
- err = networkInterfaceBindWait(volatile["host_name"])
- if err != nil {
- return err
+ if d.inst.Type() == instancetype.Container {
+ // Bind VF device onto the host so that the settings will take effect.
+ err = networkDeviceBind(vfPCIDev)
+ if err != nil {
+ return vfPCIDev, err
+ }
+
+ // Wait for VF driver to be reloaded, this will remove the VF interface temporarily, and
+ // it will re-appear shortly after. Unfortunately the time between sending the bind event
+ // to the nic and it actually appearing on the host is non-zero, so we need to watch and wait,
+ // otherwise next steps of applying settings to interface will fail.
+ err = networkInterfaceBindWait(volatile["host_name"])
+ if err != nil {
+ return vfPCIDev, err
+ }
+ } else if d.inst.Type() == instancetype.VM {
+ // Register VF device with vfio-pci driver so it can be passed to VM.
+ err = networkVFIOPCIRegister(vfPCIDev)
+ if err != nil {
+ return vfPCIDev, err
+ }
+
+ vfioDev := pciDevice{
+ Driver: "vfio-pci",
+ SlotName: vfPCIDev.SlotName,
+ }
+
+ revert.Add(func() { networkDeviceUnbind(vfioDev) })
+
+ err = networkDeviceBindWait(vfioDev)
+ if err != nil {
+ return vfPCIDev, err
+ }
+
+ // Record original driver used by VF device for restore.
+ volatile["last_state.pci.driver"] = vfPCIDev.Driver
}
- return nil
+ revert.Success()
+ return vfPCIDev, nil
}
// virtFuncInfo holds information about SR-IOV virtual functions.
@@ -490,16 +548,33 @@ func (d *nicSRIOV) restoreSriovParent(volatile map[string]string) error {
return nil
}
- // Get VF device's PCI Slot Name so we can unbind and rebind it from the host.
+ // Get VF device's PCI info so we can unbind and rebind it from the host.
vfPCIDev, err := d.networkGetVFDevicePCISlot(volatile["last_state.vf.id"])
if err != nil {
return err
}
- // Unbind VF device from the host so that the settings will take effect when we rebind it.
- err = networkDeviceUnbind(vfPCIDev)
- if err != nil {
- return err
+ if d.inst.Type() == instancetype.Container {
+ // Unbind VF device from the host so that the settings will take effect when we rebind it.
+ err = networkDeviceUnbind(vfPCIDev)
+ if err != nil {
+ return err
+ }
+ } else if d.inst.Type() == instancetype.VM {
+ // Unbind VF device from vfio-pci driver so that we can rebind it on host.
+ vfioDev := pciDevice{
+ Driver: "vfio-pci",
+ SlotName: vfPCIDev.SlotName,
+ }
+
+ err := networkDeviceUnbind(vfioDev)
+ if err != nil {
+ return err
+ }
+
+ // Before we bind the device back to the host, ensure we restore the original driver info as it
+ // should be currently set to vfio-pci.
+ vfPCIDev.Driver = volatile["last_state.pci.driver"]
}
// However we return from this function, we must try to rebind the VF so its not orphaned.
More information about the lxc-devel
mailing list