[lxc-devel] [lxd/master] VM: Adds GPU passthrough support

tomponline on Github lxc-bot at linuxcontainers.org
Fri Jun 12 11:01:59 UTC 2020


A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 1568 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20200612/363d1b0f/attachment.bin>
-------------- next part --------------
From d74e2b3510496399a4db64efbcdd1bbc3c0eea00 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Mon, 8 Jun 2020 15:55:53 +0100
Subject: [PATCH 1/9] doc/instances: Updates GPU device docs to show VM support

Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
 doc/instances.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/doc/instances.md b/doc/instances.md
index 8e8f80ff36..6663111ee9 100644
--- a/doc/instances.md
+++ b/doc/instances.md
@@ -692,7 +692,7 @@ required    | boolean   | false             | no        | Whether or not this de
 
 ### Type: gpu
 
-Supported instance types: container
+Supported instance types: container, VM
 
 GPU device entries simply make the requested gpu device appear in the
 instance.
@@ -705,9 +705,9 @@ vendorid    | string    | -                 | no        | The vendor id of the G
 productid   | string    | -                 | no        | The product id of the GPU device
 id          | string    | -                 | no        | The card id of the GPU device
 pci         | string    | -                 | no        | The pci address of the GPU device
-uid         | int       | 0                 | no        | UID of the device owner in the instance
-gid         | int       | 0                 | no        | GID of the device owner in the instance
-mode        | int       | 0660              | no        | Mode of the device in the instance
+uid         | int       | 0                 | no        | UID of the device owner in the instance (container only)
+gid         | int       | 0                 | no        | GID of the device owner in the instance (container only)
+mode        | int       | 0660              | no        | Mode of the device in the instance (container only)
 
 ### Type: proxy
 

From 189b4752842ad3f56e4125f6ee939d64599156ea Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Mon, 8 Jun 2020 16:09:32 +0100
Subject: [PATCH 2/9] lxd/device/gpu: Updates validation for VM support

Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
 lxd/device/gpu.go | 26 +++++++++++++++++++++-----
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/lxd/device/gpu.go b/lxd/device/gpu.go
index 3c111b9cac..b09a6a51f7 100644
--- a/lxd/device/gpu.go
+++ b/lxd/device/gpu.go
@@ -33,7 +33,7 @@ type gpu struct {
 
 // validateConfig checks the supplied config for correctness.
 func (d *gpu) validateConfig(instConf instance.ConfigReader) error {
-	if !instanceSupported(instConf.Type(), instancetype.Container) {
+	if !instanceSupported(instConf.Type(), instancetype.Container, instancetype.VM) {
 		return ErrUnsupportedDevType
 	}
 
@@ -52,12 +52,28 @@ func (d *gpu) validateConfig(instConf instance.ConfigReader) error {
 		return err
 	}
 
-	if d.config["pci"] != "" && (d.config["id"] != "" || d.config["productid"] != "" || d.config["vendorid"] != "") {
-		return fmt.Errorf("Cannot use id, productid or vendorid when pci is set")
+	if d.config["pci"] != "" {
+		for _, field := range []string{"id", "productid", "vendorid"} {
+			if d.config[field] != "" {
+				return fmt.Errorf(`Cannot use %q when when "pci" is set`)
+			}
+		}
+	}
+
+	if d.config["id"] != "" {
+		for _, field := range []string{"pci", "productid", "vendorid"} {
+			if d.config[field] != "" {
+				return fmt.Errorf(`Cannot use %q when when "id" is set`)
+			}
+		}
 	}
 
-	if d.config["id"] != "" && (d.config["pci"] != "" || d.config["productid"] != "" || d.config["vendorid"] != "") {
-		return fmt.Errorf("Cannot use pci, productid or vendorid when id is set")
+	if instConf.Type() == instancetype.VM {
+		for _, field := range []string{"uid", "gid", "mode"} {
+			if d.config[field] != "" {
+				return fmt.Errorf("Cannot use %q when instannce type is VM")
+			}
+		}
 	}
 
 	return nil

From cc6f5c855ca82b2357f8b2ea37541951ad9d4e6c Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Thu, 11 Jun 2020 13:29:27 +0100
Subject: [PATCH 3/9] lxd/device/config/device/runconfig: Adds GPU field to
 RunConfig

For passing through GPU device config settings to Qemu instances.

Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
 lxd/device/config/device_runconfig.go | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lxd/device/config/device_runconfig.go b/lxd/device/config/device_runconfig.go
index 35b24e3cbf..eba388047a 100644
--- a/lxd/device/config/device_runconfig.go
+++ b/lxd/device/config/device_runconfig.go
@@ -41,4 +41,5 @@ type RunConfig struct {
 	Mounts           []MountEntryItem // Mounts to setup/remove.
 	Uevents          [][]string       // Uevents to inject.
 	PostHooks        []func() error   // Functions to be run after device attach/detach.
+	GPUDevice        []RunConfigItem  // GPU device configuration settings.
 }

From b1f04d86324234fb8027dca8664727b1c6a1edd4 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Fri, 12 Jun 2020 09:47:04 +0100
Subject: [PATCH 4/9] lxd/device/device/utils/generic: pciDeviceDriverOverride
 only check for driver binding if specified

Allows for clearing driver override and rebinding when original driver is unknown (or device was not originally bound to a driver).

Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
 lxd/device/device_utils_generic.go | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/lxd/device/device_utils_generic.go b/lxd/device/device_utils_generic.go
index a959973c6e..819353e0b6 100644
--- a/lxd/device/device_utils_generic.go
+++ b/lxd/device/device_utils_generic.go
@@ -150,10 +150,12 @@ func pciDeviceDriverOverride(pciDev pciDevice, driverOverride string) error {
 		SlotName: pciDev.SlotName,
 	}
 
-	// Wait for the device to be bound to the overridden driver.
-	err = pciDeviceProbeWait(vfioDev)
-	if err != nil {
-		return err
+	// Wait for the device to be bound to the overridden driver if specified.
+	if vfioDev.Driver != "" {
+		err = pciDeviceProbeWait(vfioDev)
+		if err != nil {
+			return err
+		}
 	}
 
 	revert.Success()

From 1fa434540d0739e610bfa4bbb6cdf2d8c086be04 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Thu, 11 Jun 2020 13:43:04 +0100
Subject: [PATCH 5/9] lxd/device/gpu: Adds VM GPU passthrough support

Unbinds specified device and associated IOMMU group VFs and rebinds them to vfio-pci.

Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
 lxd/device/gpu.go | 170 +++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 160 insertions(+), 10 deletions(-)

diff --git a/lxd/device/gpu.go b/lxd/device/gpu.go
index b09a6a51f7..200415a75d 100644
--- a/lxd/device/gpu.go
+++ b/lxd/device/gpu.go
@@ -9,6 +9,7 @@ import (
 	"strconv"
 	"strings"
 
+	"github.com/pkg/errors"
 	"golang.org/x/sys/unix"
 
 	deviceConfig "github.com/lxc/lxd/lxd/device/config"
@@ -95,6 +96,16 @@ func (d *gpu) Start() (*deviceConfig.RunConfig, error) {
 		return nil, err
 	}
 
+	if d.inst.Type() == instancetype.VM {
+		return d.startVM()
+	}
+
+	return d.startContainer()
+}
+
+// startContainer detects the requested GPU devices and sets up unix-char devices.
+// Returns RunConfig populated with mount info required to pass the unix-char devices into the container.
+func (d *gpu) startContainer() (*deviceConfig.RunConfig, error) {
 	runConf := deviceConfig.RunConfig{}
 	gpus, err := resources.GetGPU()
 	if err != nil {
@@ -103,14 +114,16 @@ func (d *gpu) Start() (*deviceConfig.RunConfig, error) {
 
 	sawNvidia := false
 	found := false
+
 	for _, gpu := range gpus.Cards {
+		// Skip any cards that don't match the vendorid, pci or productid settings (if specified).
 		if (d.config["vendorid"] != "" && gpu.VendorID != d.config["vendorid"]) ||
 			(d.config["pci"] != "" && gpu.PCIAddress != d.config["pci"]) ||
 			(d.config["productid"] != "" && gpu.ProductID != d.config["productid"]) {
 			continue
 		}
 
-		// Handle DRM devices if present and matches criteria.
+		// Setup DRM unix-char devices if present and matches id criteria (or if id not specified).
 		if gpu.DRM != nil && (d.config["id"] == "" || fmt.Sprintf("%d", gpu.DRM.ID) == d.config["id"]) {
 			found = true
 
@@ -170,9 +183,9 @@ func (d *gpu) Start() (*deviceConfig.RunConfig, error) {
 		}
 	}
 
+	// Setup additional unix-char devices for nvidia cards.
+	// No need to mount additional nvidia non-card devices as the nvidia.runtime setting will do this for us.
 	if sawNvidia {
-		// No need to mount additional nvidia non-card devices as the nvidia.runtime
-		// setting will do this for us.
 		instanceConfig := d.inst.ExpandedConfig()
 		if !shared.IsTrue(instanceConfig["nvidia.runtime"]) {
 			nvidiaDevices, err := d.getNvidiaNonCardDevices()
@@ -201,15 +214,130 @@ func (d *gpu) Start() (*deviceConfig.RunConfig, error) {
 	return &runConf, nil
 }
 
+// startVM detects the requested GPU devices and related virtual functions and rebinds them to the vfio-pci driver.
+func (d *gpu) startVM() (*deviceConfig.RunConfig, error) {
+	runConf := deviceConfig.RunConfig{}
+	gpus, err := resources.GetGPU()
+	if err != nil {
+		return nil, err
+	}
+
+	saveData := make(map[string]string)
+	var pciAddress string
+
+	for _, gpu := range gpus.Cards {
+		// Skip any cards that don't match the vendorid, pci, productid or DRM ID settings (if specified).
+		if (d.config["vendorid"] != "" && gpu.VendorID != d.config["vendorid"]) ||
+			(d.config["pci"] != "" && gpu.PCIAddress != d.config["pci"]) ||
+			(d.config["productid"] != "" && gpu.ProductID != d.config["productid"]) ||
+			(d.config["id"] != "" && (gpu.DRM == nil || fmt.Sprintf("%d", gpu.DRM.ID) != d.config["id"])) {
+			continue
+		}
+
+		if pciAddress != "" {
+			return nil, fmt.Errorf("VMs cannot match multiple GPUs per device")
+		}
+
+		pciAddress = gpu.PCIAddress
+	}
+
+	if pciAddress == "" {
+		return nil, fmt.Errorf("Failed to detect requested GPU device")
+	}
+
+	// Get PCI information about the GPU device.
+	devicePath := filepath.Join("/sys/bus/pci/devices", pciAddress)
+	pciDev, err := pciParseUeventFile(filepath.Join(devicePath, "uevent"))
+	if err != nil {
+		return nil, errors.Wrapf(err, "Failed to get PCI device info for GPU %q", pciAddress)
+	}
+
+	saveData["last_state.pci.slot.name"] = pciDev.SlotName
+	saveData["last_state.pci.driver"] = pciDev.Driver
+
+	err = d.pciDeviceDriverOverrideIOMMU(pciDev, "vfio-pci", false)
+	if err != nil {
+		return nil, errors.Wrapf(err, "Failed to override IOMMU group driver")
+	}
+
+	runConf.GPUDevice = append(runConf.GPUDevice,
+		[]deviceConfig.RunConfigItem{
+			{Key: "devName", Value: d.name},
+			{Key: "pciSlotName", Value: saveData["last_state.pci.slot.name"]},
+		}...)
+
+	err = d.volatileSet(saveData)
+	if err != nil {
+		return nil, err
+	}
+
+	return &runConf, nil
+}
+
+// pciDeviceDriverOverrideIOMMU overrides all functions in the specified device's IOMMU group (if exists) that
+// are functions of the device. If IOMMU group doesn't exist, only the device itself is overridden.
+// If restore argument is true, then IOMMU VF devices related to the main device have their driver override cleared
+// rather than being set to the driverOverride specified. This allows for IOMMU VFs that were using a different
+// driver (or no driver) when being overridden are not restored back to the main device's driver.
+func (d *gpu) pciDeviceDriverOverrideIOMMU(pciDev pciDevice, driverOverride string, restore bool) error {
+	iommuGroupPath := filepath.Join("/sys/bus/pci/devices", pciDev.SlotName, "iommu_group", "devices")
+
+	if shared.PathExists(iommuGroupPath) {
+		// Extract parent slot name by removing any virtual function ID.
+		parts := strings.SplitN(pciDev.SlotName, ".", 2)
+		prefix := parts[0]
+
+		// Iterate the members of the IOMMU group and override any that match the parent slot name prefix.
+		err := filepath.Walk(iommuGroupPath, func(path string, _ os.FileInfo, err error) error {
+			if err != nil {
+				return err
+			}
+
+			iommuSlotName := filepath.Base(path) // Virtual function's address is dir name.
+			if strings.HasPrefix(iommuSlotName, prefix) {
+				iommuPciDev := pciDevice{
+					Driver:   pciDev.Driver,
+					SlotName: iommuSlotName,
+				}
+
+				if iommuSlotName != pciDev.SlotName && restore {
+					// We don't know the original driver for VFs, so just remove override.
+					err = pciDeviceDriverOverride(iommuPciDev, "")
+				} else {
+					err = pciDeviceDriverOverride(iommuPciDev, driverOverride)
+				}
+
+				if err != nil {
+					return err
+				}
+			}
+
+			return nil
+		})
+		if err != nil {
+			return err
+		}
+	} else {
+		err := pciDeviceDriverOverride(pciDev, driverOverride)
+		if err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+
 // Stop is run when the device is removed from the instance.
 func (d *gpu) Stop() (*deviceConfig.RunConfig, error) {
 	runConf := deviceConfig.RunConfig{
 		PostHooks: []func() error{d.postStop},
 	}
 
-	err := unixDeviceRemove(d.inst.DevicesPath(), "unix", d.name, "", &runConf)
-	if err != nil {
-		return nil, err
+	if d.inst.Type() == instancetype.Container {
+		err := unixDeviceRemove(d.inst.DevicesPath(), "unix", d.name, "", &runConf)
+		if err != nil {
+			return nil, err
+		}
 	}
 
 	return &runConf, nil
@@ -217,10 +345,32 @@ func (d *gpu) Stop() (*deviceConfig.RunConfig, error) {
 
 // postStop is run after the device is removed from the instance.
 func (d *gpu) postStop() error {
-	// Remove host files for this device.
-	err := unixDeviceDeleteFiles(d.state, d.inst.DevicesPath(), "unix", d.name, "")
-	if err != nil {
-		return fmt.Errorf("Failed to delete files for device '%s': %v", d.name, err)
+	defer d.volatileSet(map[string]string{
+		"last_state.pci.slot.name": "",
+		"last_state.pci.driver":    "",
+	})
+
+	v := d.volatileGet()
+
+	if d.inst.Type() == instancetype.Container {
+		// Remove host files for this device.
+		err := unixDeviceDeleteFiles(d.state, d.inst.DevicesPath(), "unix", d.name, "")
+		if err != nil {
+			return fmt.Errorf("Failed to delete files for device '%s': %v", d.name, err)
+		}
+	}
+
+	// If VM physical pass through, unbind from vfio-pci and bind back to host driver.
+	if d.inst.Type() == instancetype.VM && v["last_state.pci.slot.name"] != "" {
+		pciDev := pciDevice{
+			Driver:   "vfio-pci",
+			SlotName: v["last_state.pci.slot.name"],
+		}
+
+		err := d.pciDeviceDriverOverrideIOMMU(pciDev, v["last_state.pci.driver"], true)
+		if err != nil {
+			return err
+		}
 	}
 
 	return nil

From 33d9f1f180fbb9e26338cfd43045f390a4291815 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Fri, 12 Jun 2020 10:09:57 +0100
Subject: [PATCH 6/9] lxd/instance/drivers/driver/qemu/templates: Consistent
 naming and casing for net dev templates

Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
 lxd/instance/drivers/driver_qemu_templates.go | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/lxd/instance/drivers/driver_qemu_templates.go b/lxd/instance/drivers/driver_qemu_templates.go
index f4a6c3f1ca..9f4c3ae179 100644
--- a/lxd/instance/drivers/driver_qemu_templates.go
+++ b/lxd/instance/drivers/driver_qemu_templates.go
@@ -357,8 +357,8 @@ multifunction = "on"
 {{- end }}
 `))
 
-// qemuDevTapCommon is common PCI device template for tap based netdevs.
-var qemuDevTapCommon = template.Must(template.New("qemuDevTapCommon").Parse(`
+// qemuNetDevTapCommon is common PCI device template for tap based netdevs.
+var qemuNetDevTapCommon = template.Must(template.New("qemuNetDevTapCommon").Parse(`
 [device "dev-lxd_{{.devName}}"]
 {{- if eq .bus "pci" "pcie"}}
 driver = "virtio-net-pci"
@@ -377,7 +377,7 @@ multifunction = "on"
 `))
 
 // Devices use "lxd_" prefix indicating that this is a user named device.
-var qemuNetDevTapTun = template.Must(qemuDevTapCommon.New("qemuNetDevTapTun").Parse(`
+var qemuNetDevTapTun = template.Must(qemuNetDevTapCommon.New("qemuNetDevTapTun").Parse(`
 # Network card ("{{.devName}}" device)
 [netdev "lxd_{{.devName}}"]
 type = "tap"
@@ -385,21 +385,21 @@ vhost = "on"
 ifname = "{{.ifName}}"
 script = "no"
 downscript = "no"
-{{ template "qemuDevTapCommon" . -}}
+{{ template "qemuNetDevTapCommon" . -}}
 `))
 
 // Devices use "lxd_" prefix indicating that this is a user named device.
-var qemuNetdevTapFD = template.Must(qemuDevTapCommon.New("qemuNetdevTapFD").Parse(`
+var qemuNetDevTapFD = template.Must(qemuNetDevTapCommon.New("qemuNetDevTapFD").Parse(`
 # Network card ("{{.devName}}" device)
 [netdev "lxd_{{.devName}}"]
 type = "tap"
 vhost = "on"
 fd = "{{.tapFD}}"
-{{ template "qemuDevTapCommon" . -}}
+{{ template "qemuNetDevTapCommon" . -}}
 `))
 
 // Devices use "lxd_" prefix indicating that this is a user named device.
-var qemuNetdevPhysical = template.Must(template.New("qemuNetdevPhysical").Parse(`
+var qemuNetDevPhysical = template.Must(template.New("qemuNetDevPhysical").Parse(`
 # Network card ("{{.devName}}" device)
 [device "dev-lxd_{{.devName}}"]
 {{- if eq .bus "pci" "pcie"}}

From 53d5f528f26ef5012308bf6832defafbfa9c2dbc Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Fri, 12 Jun 2020 10:11:19 +0100
Subject: [PATCH 7/9] lxd/instance/drivers/driver/qemu: Consistent net dev
 naming usage

Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
 lxd/instance/drivers/driver_qemu.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lxd/instance/drivers/driver_qemu.go b/lxd/instance/drivers/driver_qemu.go
index 7c12346a98..a7e9070af0 100644
--- a/lxd/instance/drivers/driver_qemu.go
+++ b/lxd/instance/drivers/driver_qemu.go
@@ -2005,7 +2005,7 @@ func (vm *qemu) addNetDevConfig(sb *strings.Builder, bus *qemuBus, bootIndexes m
 
 		// Append the tap device file path to the list of files to be opened and passed to qemu.
 		tplFields["tapFD"] = vm.addFileDescriptor(fdFiles, fmt.Sprintf("/dev/tap%d", ifindex))
-		tpl = qemuNetdevTapFD
+		tpl = qemuNetDevTapFD
 	} else if shared.PathExists(fmt.Sprintf("/sys/class/net/%s/tun_flags", nicName)) {
 		// Detect TAP (via TUN driver) device.
 		tplFields["ifName"] = nicName
@@ -2013,7 +2013,7 @@ func (vm *qemu) addNetDevConfig(sb *strings.Builder, bus *qemuBus, bootIndexes m
 	} else if pciSlotName != "" {
 		// Detect physical passthrough device.
 		tplFields["pciSlotName"] = pciSlotName
-		tpl = qemuNetdevPhysical
+		tpl = qemuNetDevPhysical
 	}
 
 	devBus, devAddr, multi := bus.allocate("")

From 873535e62cdece4f82bbe0f5cfa517904d91c9d4 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Fri, 12 Jun 2020 11:22:05 +0100
Subject: [PATCH 8/9] lxd/instance/drivers/driver/qemu/templates: Adds
 qemuGPUDevPhysical template

Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
 lxd/instance/drivers/driver_qemu_templates.go | 21 +++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/lxd/instance/drivers/driver_qemu_templates.go b/lxd/instance/drivers/driver_qemu_templates.go
index 9f4c3ae179..52b9632003 100644
--- a/lxd/instance/drivers/driver_qemu_templates.go
+++ b/lxd/instance/drivers/driver_qemu_templates.go
@@ -416,3 +416,24 @@ bootindex = "{{.bootIndex}}"
 multifunction = "on"
 {{- end }}
 `))
+
+// Devices use "lxd_" prefix indicating that this is a user named device.
+var qemuGPUDevPhysical = template.Must(template.New("qemuGPUDevPhysical").Parse(`
+# GPU card ("{{.devName}}" device)
+[device "dev-lxd_{{.devName}}"]
+{{- if eq .bus "pci" "pcie"}}
+driver = "vfio-pci"
+bus = "{{.devBus}}"
+addr = "{{.devAddr}}"
+{{- end}}
+{{if eq .bus "ccw" -}}
+driver = "vfio-ccw"
+{{- end}}
+host = "{{.pciSlotName}}"
+{{if .vga -}}
+x-vga = "on"
+{{- end }}
+{{if .multifunction -}}
+multifunction = "on"
+{{- end }}
+`))

From 15e3eaf43f0717248d2c50d9ab2bebcd499db9bd Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parrott at canonical.com>
Date: Fri, 12 Jun 2020 11:22:23 +0100
Subject: [PATCH 9/9] lxd/instance/drivers/driver/qemu: Adds GPU passthrough
 support

Signed-off-by: Thomas Parrott <thomas.parrott at canonical.com>
---
 lxd/instance/drivers/driver_qemu.go | 85 +++++++++++++++++++++++++++++
 1 file changed, 85 insertions(+)

diff --git a/lxd/instance/drivers/driver_qemu.go b/lxd/instance/drivers/driver_qemu.go
index a7e9070af0..58b8f0f978 100644
--- a/lxd/instance/drivers/driver_qemu.go
+++ b/lxd/instance/drivers/driver_qemu.go
@@ -1728,6 +1728,14 @@ func (vm *qemu) generateQemuConfigFile(busName string, devConfs []*deviceConfig.
 				return "", err
 			}
 		}
+
+		// Add GPU device.
+		if len(runConf.GPUDevice) > 0 {
+			err = vm.addGPUDevConfig(sb, bus, runConf.GPUDevice)
+			if err != nil {
+				return "", err
+			}
+		}
 	}
 
 	// Write the agent mount config.
@@ -2027,6 +2035,83 @@ func (vm *qemu) addNetDevConfig(sb *strings.Builder, bus *qemuBus, bootIndexes m
 	return fmt.Errorf("Unrecognised device type")
 }
 
+// addGPUDevConfig adds the qemu config required for adding a GPU device.
+func (vm *qemu) addGPUDevConfig(sb *strings.Builder, bus *qemuBus, gpuConfig []deviceConfig.RunConfigItem) error {
+	var devName, pciSlotName string
+	for _, gpuItem := range gpuConfig {
+		if gpuItem.Key == "devName" {
+			devName = gpuItem.Value
+		} else if gpuItem.Key == "pciSlotName" {
+			pciSlotName = gpuItem.Value
+		}
+	}
+
+	devBus, devAddr, multi := bus.allocate(fmt.Sprintf("lxd_%s", devName))
+	tplFields := map[string]interface{}{
+		"bus":           bus.name,
+		"devBus":        devBus,
+		"devAddr":       devAddr,
+		"multifunction": multi,
+
+		"devName":     devName,
+		"pciSlotName": pciSlotName,
+		"vga":         true,
+	}
+
+	// Add main GPU device in VGA mode to qemu config.
+	err := qemuGPUDevPhysical.Execute(sb, tplFields)
+	if err != nil {
+		return err
+	}
+
+	// Add any other related IOMMU VFs as generic PCI devices.
+	iommuGroupPath := filepath.Join("/sys/bus/pci/devices", pciSlotName, "iommu_group", "devices")
+
+	if shared.PathExists(iommuGroupPath) {
+		// Extract parent slot name by removing any virtual function ID.
+		parts := strings.SplitN(pciSlotName, ".", 2)
+		prefix := parts[0]
+
+		// Iterate the members of the IOMMU group and override any that match the parent slot name prefix.
+		err := filepath.Walk(iommuGroupPath, func(path string, _ os.FileInfo, err error) error {
+			if err != nil {
+				return err
+			}
+
+			iommuSlotName := filepath.Base(path) // Virtual function's address is dir name.
+
+			// Match any VFs that are related to the GPU device (but not the GPU device itself).
+			if strings.HasPrefix(iommuSlotName, prefix) && iommuSlotName != pciSlotName {
+				// Add VF device without VGA mode to qemu config.
+				devBus, devAddr, multi := bus.allocate(fmt.Sprintf("lxd_%s", devName))
+				tplFields := map[string]interface{}{
+					"bus":           bus.name,
+					"devBus":        devBus,
+					"devAddr":       devAddr,
+					"multifunction": multi,
+
+					// Generate associated device name by combining main device name and VF ID.
+					"devName":     fmt.Sprintf("%s_%s", devName, devAddr),
+					"pciSlotName": iommuSlotName,
+					"vga":         false,
+				}
+
+				err := qemuGPUDevPhysical.Execute(sb, tplFields)
+				if err != nil {
+					return err
+				}
+			}
+
+			return nil
+		})
+		if err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+
 // pidFilePath returns the path where the qemu process should write its PID.
 func (vm *qemu) pidFilePath() string {
 	return filepath.Join(vm.LogPath(), "qemu.pid")


More information about the lxc-devel mailing list