[lxc-devel] [lxd/master] gpu: don't fail during parse

brauner on Github lxc-bot at linuxcontainers.org
Sat Jun 23 16:38:37 UTC 2018


A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 381 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20180623/e699979f/attachment.bin>
-------------- next part --------------
From 2e2499317a6431efc1c03f334e51553af39a9f0a Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner at ubuntu.com>
Date: Sat, 23 Jun 2018 18:37:51 +0200
Subject: [PATCH] gpu: don't fail during parse

Closes #4680.

Signed-off-by: Christian Brauner <christian.brauner at ubuntu.com>
---
 lxd/container_lxc.go | 47 ++++++++++++++++++++++++++++-------------------
 lxd/devices.go       | 33 ++++++++++++++-------------------
 2 files changed, 42 insertions(+), 38 deletions(-)

diff --git a/lxd/container_lxc.go b/lxd/container_lxc.go
index 9b5fb05aa..111f0e853 100644
--- a/lxd/container_lxc.go
+++ b/lxd/container_lxc.go
@@ -2017,9 +2017,7 @@ func (c *containerLXC) startCommon() (string, error) {
 	c.removeProxyDevices()
 
 	var usbs []usbDevice
-	var gpus []gpuDevice
 	var sriov []string
-	var nvidiaDevices []nvidiaGpuDevices
 	diskDevices := map[string]types.Device{}
 
 	// Create the devices
@@ -2081,11 +2079,10 @@ func (c *containerLXC) startCommon() (string, error) {
 				}
 			}
 		} else if m["type"] == "gpu" {
-			if gpus == nil {
-				gpus, nvidiaDevices, err = deviceLoadGpu(deviceWantsAllGPUs(m))
-				if err != nil {
-					return "", err
-				}
+			allGpus := deviceWantsAllGPUs(m)
+			gpus, nvidiaDevices, err := deviceLoadGpu(allGpus)
+			if err != nil {
+				return "", err
 			}
 
 			sawNvidia := false
@@ -2114,6 +2111,10 @@ func (c *containerLXC) startCommon() (string, error) {
 					if err != nil {
 						return "", err
 					}
+				} else if !allGpus {
+					errMsg := fmt.Errorf("Failed to detect correct \"/dev/nvidia\" path")
+					logger.Errorf("%s", errMsg)
+					return "", errMsg
 				}
 
 				sawNvidia = true
@@ -4271,8 +4272,6 @@ func (c *containerLXC) Update(args db.ContainerArgs, userRequested bool) error {
 		}
 
 		var usbs []usbDevice
-		var gpus []gpuDevice
-		var nvidiaDevices []nvidiaGpuDevices
 
 		// Live update the devices
 		for k, m := range removeDevices {
@@ -4326,11 +4325,10 @@ func (c *containerLXC) Update(args db.ContainerArgs, userRequested bool) error {
 					}
 				}
 			} else if m["type"] == "gpu" {
-				if gpus == nil {
-					gpus, nvidiaDevices, err = deviceLoadGpu(deviceWantsAllGPUs(m))
-					if err != nil {
-						return err
-					}
+				allGpus := deviceWantsAllGPUs(m)
+				gpus, nvidiaDevices, err := deviceLoadGpu(allGpus)
+				if err != nil {
+					return err
 				}
 
 				for _, gpu := range gpus {
@@ -4357,6 +4355,10 @@ func (c *containerLXC) Update(args db.ContainerArgs, userRequested bool) error {
 							logger.Error("Failed to remove GPU device", log.Ctx{"err": err, "gpu": gpu, "container": c.Name()})
 							return err
 						}
+					} else if !allGpus {
+						errMsg := fmt.Errorf("Failed to detect correct \"/dev/nvidia\" path")
+						logger.Errorf("%s", errMsg)
+						return errMsg
 					}
 				}
 
@@ -4367,6 +4369,10 @@ func (c *containerLXC) Update(args db.ContainerArgs, userRequested bool) error {
 							nvidiaExists = true
 							break
 						}
+					} else if !allGpus {
+						errMsg := fmt.Errorf("Failed to detect correct \"/dev/nvidia\" path")
+						logger.Errorf("%s", errMsg)
+						return errMsg
 					}
 				}
 
@@ -4452,11 +4458,10 @@ func (c *containerLXC) Update(args db.ContainerArgs, userRequested bool) error {
 					}
 				}
 			} else if m["type"] == "gpu" {
-				if gpus == nil {
-					gpus, nvidiaDevices, err = deviceLoadGpu(deviceWantsAllGPUs(m))
-					if err != nil {
-						return err
-					}
+				allGpus := deviceWantsAllGPUs(m)
+				gpus, nvidiaDevices, err := deviceLoadGpu(allGpus)
+				if err != nil {
+					return err
 				}
 
 				sawNvidia := false
@@ -4487,6 +4492,10 @@ func (c *containerLXC) Update(args db.ContainerArgs, userRequested bool) error {
 							logger.Error("Failed to insert GPU device", log.Ctx{"err": err, "gpu": gpu, "container": c.Name()})
 							return err
 						}
+					} else if !allGpus {
+						errMsg := fmt.Errorf("Failed to detect correct \"/dev/nvidia\" path")
+						logger.Errorf("%s", errMsg)
+						return errMsg
 					}
 
 					sawNvidia = true
diff --git a/lxd/devices.go b/lxd/devices.go
index b1fdeb46c..9dae83a38 100644
--- a/lxd/devices.go
+++ b/lxd/devices.go
@@ -276,27 +276,22 @@ func deviceLoadGpu(all bool) ([]gpuDevice, []nvidiaGpuDevices, error) {
 
 				if !all {
 					minor, err := findNvidiaMinor(tmpGpu.pci)
-					if err != nil {
-						if os.IsNotExist(err) {
-							continue
-						}
-						return nil, nil, err
-					}
-
-					nvidiaPath := "/dev/nvidia" + minor
-					stat := syscall.Stat_t{}
-					err = syscall.Stat(nvidiaPath, &stat)
-					if err != nil {
-						if os.IsNotExist(err) {
-							continue
+					if err == nil {
+						nvidiaPath := "/dev/nvidia" + minor
+						stat := syscall.Stat_t{}
+						err = syscall.Stat(nvidiaPath, &stat)
+						if err != nil {
+							if os.IsNotExist(err) {
+								continue
+							}
+
+							return nil, nil, err
 						}
-
-						return nil, nil, err
+						tmpGpu.nvidia.path = nvidiaPath
+						tmpGpu.nvidia.major = shared.Major(stat.Rdev)
+						tmpGpu.nvidia.minor = shared.Minor(stat.Rdev)
+						tmpGpu.nvidia.id = strconv.Itoa(tmpGpu.nvidia.minor)
 					}
-					tmpGpu.nvidia.path = nvidiaPath
-					tmpGpu.nvidia.major = shared.Major(stat.Rdev)
-					tmpGpu.nvidia.minor = shared.Minor(stat.Rdev)
-					tmpGpu.nvidia.id = strconv.Itoa(tmpGpu.nvidia.minor)
 				}
 			}
 


More information about the lxc-devel mailing list