[lxc-devel] [lxd/master] NVIDIA detection fallback

stgraber on Github lxc-bot at linuxcontainers.org
Sat Sep 14 10:59:07 UTC 2019


A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 301 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20190914/5098c09c/attachment.bin>
-------------- next part --------------
From 5253892f7241356bd2f8ab58bedbd0767b9b82a2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber at ubuntu.com>
Date: Sat, 14 Sep 2019 12:58:02 +0200
Subject: [PATCH 1/2] shared/api: Make some NVIDIA fields omitempty
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Stéphane Graber <stgraber at ubuntu.com>
---
 shared/api/resource.go | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/shared/api/resource.go b/shared/api/resource.go
index eea1b86f6e..6590398848 100644
--- a/shared/api/resource.go
+++ b/shared/api/resource.go
@@ -119,13 +119,13 @@ type ResourcesGPUCardSRIOV struct {
 // ResourcesGPUCardNvidia represents additional information for NVIDIA GPUs
 // API extension: resources_gpu
 type ResourcesGPUCardNvidia struct {
-	CUDAVersion string `json:"cuda_version" yaml:"cuda_version"`
-	NVRMVersion string `json:"nvrm_version" yaml:"nvrm_version"`
+	CUDAVersion string `json:"cuda_version,omitempty" yaml:"cuda_version,omitempty"`
+	NVRMVersion string `json:"nvrm_version,omitempty" yaml:"nvrm_version,omitempty"`
 
 	Brand        string `json:"brand" yaml:"brand"`
 	Model        string `json:"model" yaml:"model"`
-	UUID         string `json:"uuid" yaml:"uuid"`
-	Architecture string `json:"architecture" yaml:"architecture"`
+	UUID         string `json:"uuid,omitempty" yaml:"uuid,omitempty"`
+	Architecture string `json:"architecture,omitempty" yaml:"architecture,omitempty"`
 
 	// API extension: resources_v2
 	CardName   string `json:"card_name" yaml:"card_name"`

From b40089ed83d765f4812e9d7409d8c6377ada9ce4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber at ubuntu.com>
Date: Sat, 14 Sep 2019 12:58:14 +0200
Subject: [PATCH 2/2] lxd/resources: Implement NVIDIA device fallback
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes #6195

Signed-off-by: Stéphane Graber <stgraber at ubuntu.com>
---
 lxd/resources/gpu.go | 67 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 66 insertions(+), 1 deletion(-)

diff --git a/lxd/resources/gpu.go b/lxd/resources/gpu.go
index f2963e6ece..a2d972d5e1 100644
--- a/lxd/resources/gpu.go
+++ b/lxd/resources/gpu.go
@@ -1,10 +1,12 @@
 package resources
 
 import (
+	"bufio"
 	"encoding/csv"
 	"fmt"
 	"io"
 	"io/ioutil"
+	"os"
 	"os/exec"
 	"path/filepath"
 	"strconv"
@@ -18,6 +20,66 @@ import (
 )
 
 var sysClassDrm = "/sys/class/drm"
+var procDriverNvidia = "/proc/driver/nvidia"
+
+func loadNvidiaProc() (map[string]*api.ResourcesGPUCardNvidia, error) {
+	nvidiaCards := map[string]*api.ResourcesGPUCardNvidia{}
+
+	gpusPath := filepath.Join(procDriverNvidia, "gpus")
+	if !sysfsExists(gpusPath) {
+		return nil, fmt.Errorf("No NVIDIA GPU proc driver")
+	}
+
+	// List the GPUs from /proc
+	entries, err := ioutil.ReadDir(gpusPath)
+	if err != nil {
+		return nil, errors.Wrapf(err, "Failed to list \"%s\"", gpusPath)
+	}
+
+	for _, entry := range entries {
+		entryName := entry.Name()
+		entryPath := filepath.Join(gpusPath, entryName)
+
+		if !sysfsExists(filepath.Join(entryPath, "information")) {
+			continue
+		}
+
+		// Get the GPU information
+		f, err := os.Open(filepath.Join(entryPath, "information"))
+		if err != nil {
+			return nil, errors.Wrapf(err, "Failed to open \"%s\"", filepath.Join(entryPath, "information"))
+		}
+		defer f.Close()
+
+		gpuInfo := bufio.NewScanner(f)
+		nvidiaCard := &api.ResourcesGPUCardNvidia{}
+		for gpuInfo.Scan() {
+			line := strings.TrimSpace(gpuInfo.Text())
+
+			fields := strings.SplitN(line, ":", 2)
+			if len(fields) != 2 {
+				continue
+			}
+
+			key := strings.TrimSpace(fields[0])
+			value := strings.TrimSpace(fields[1])
+
+			if key == "Model" {
+				nvidiaCard.Model = value
+				nvidiaCard.Brand = strings.Split(value, " ")[0]
+			}
+
+			if key == "Device Minor" {
+				nvidiaCard.CardName = fmt.Sprintf("nvidia%s", value)
+				nvidiaCard.CardDevice = fmt.Sprintf("195:%s", value)
+			}
+		}
+
+		nvidiaCards[entryName] = nvidiaCard
+	}
+
+	return nvidiaCards, nil
+}
 
 func loadNvidiaContainer() (map[string]*api.ResourcesGPUCardNvidia, error) {
 	// Check for nvidia-container-cli
@@ -271,7 +333,10 @@ func GetGPU() (*api.ResourcesGPU, error) {
 	// Load NVIDIA information
 	nvidiaCards, err := loadNvidiaContainer()
 	if err != nil {
-		nvidiaCards = map[string]*api.ResourcesGPUCardNvidia{}
+		nvidiaCards, err = loadNvidiaProc()
+		if err != nil {
+			nvidiaCards = map[string]*api.ResourcesGPUCardNvidia{}
+		}
 	}
 
 	// Temporary variables


More information about the lxc-devel mailing list