[lxc-devel] [lxd/master] gpu: handle cards among Nvidia devices

brauner on Github lxc-bot at linuxcontainers.org
Tue Jun 26 12:24:15 UTC 2018


A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 381 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20180626/b5cc4d81/attachment.bin>
-------------- next part --------------
From 8a0e9a4004c82dbc21f6616e0ec7b053611acb98 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Tue, 26 Jun 2018 14:23:16 +0200
Subject: [PATCH] gpu: handle cards among Nvidia devices

Closes #4683.

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 lxd/container_lxc.go | 25 ++++++++++++++++++++++---
 lxd/devices.go       | 14 ++++++++------
 2 files changed, 30 insertions(+), 9 deletions(-)

diff --git a/lxd/container_lxc.go b/lxd/container_lxc.go
index 111f0e853..4284e3a55 100644
--- a/lxd/container_lxc.go
+++ b/lxd/container_lxc.go
@@ -2120,8 +2120,13 @@ func (c *containerLXC) startCommon() (string, error) {
 				sawNvidia = true
 			}
 
-			if sawNvidia && !shared.IsTrue(c.expandedConfig["nvidia.runtime"]) {
+			if sawNvidia {
 				for _, gpu := range nvidiaDevices {
+					if shared.IsTrue(c.expandedConfig["nvidia.runtime"]) {
+						if !gpu.isCard {
+							continue
+						}
+					}
 					err := c.setupUnixDevice(fmt.Sprintf("unix.%s", k), m, gpu.major, gpu.minor, gpu.path, true, false)
 					if err != nil {
 						return "", err
@@ -4376,11 +4381,18 @@ func (c *containerLXC) Update(args db.ContainerArgs, userRequested bool) error {
 					}
 				}
 
-				if !nvidiaExists && !shared.IsTrue(c.expandedConfig["nvidia.runtime"]) {
+				if !nvidiaExists {
 					for _, gpu := range nvidiaDevices {
+						if shared.IsTrue(c.expandedConfig["nvidia.runtime"]) {
+							if !gpu.isCard {
+								continue
+							}
+						}
+
 						if !c.deviceExistsInDevicesFolder(fmt.Sprintf("unix.%s", k), gpu.path) {
 							continue
 						}
+
 						err = c.removeUnixDeviceNum(fmt.Sprintf("unix.%s", k), m, gpu.major, gpu.minor, gpu.path)
 						if err != nil {
 							logger.Error("Failed to remove GPU device", log.Ctx{"err": err, "gpu": gpu, "container": c.Name()})
@@ -4501,11 +4513,18 @@ func (c *containerLXC) Update(args db.ContainerArgs, userRequested bool) error {
 					sawNvidia = true
 				}
 
-				if sawNvidia && !shared.IsTrue(c.expandedConfig["nvidia.runtime"]) {
+				if sawNvidia {
 					for _, gpu := range nvidiaDevices {
+						if shared.IsTrue(c.expandedConfig["nvidia.runtime"]) {
+							if !gpu.isCard {
+								continue
+							}
+						}
+
 						if c.deviceExistsInDevicesFolder(k, gpu.path) {
 							continue
 						}
+
 						err = c.insertUnixDeviceNum(fmt.Sprintf("unix.%s", k), m, gpu.major, gpu.minor, gpu.path, false)
 						if err != nil {
 							logger.Error("Failed to insert GPU device", log.Ctx{"err": err, "gpu": gpu, "container": c.Name()})
diff --git a/lxd/devices.go b/lxd/devices.go
index 9dae83a38..63a088121 100644
--- a/lxd/devices.go
+++ b/lxd/devices.go
@@ -69,9 +69,10 @@ type nvidiaGpuCards struct {
 
 // {/dev/nvidiactl, /dev/nvidia-uvm, ...}
 type nvidiaGpuDevices struct {
-	path  string
-	major int
-	minor int
+	isCard bool
+	path   string
+	major  int
+	minor  int
 }
 
 // /dev/dri/card0. If we detect that vendor == nvidia, then nvidia will contain
@@ -339,9 +340,10 @@ func deviceLoadGpu(all bool) ([]gpuDevice, []nvidiaGpuDevices, error) {
 				continue
 			}
 			tmpNividiaGpu := nvidiaGpuDevices{
-				path:  nvidiaPath,
-				major: shared.Major(stat.Rdev),
-				minor: shared.Minor(stat.Rdev),
+				isCard: !validNvidia.MatchString(nvidiaEnt.Name()),
+				path:   nvidiaPath,
+				major:  shared.Major(stat.Rdev),
+				minor:  shared.Minor(stat.Rdev),
 			}
 			nvidiaDevices = append(nvidiaDevices, tmpNividiaGpu)
 		}


More information about the lxc-devel mailing list