[lxc-devel] [lxd/master] NVIDIA detection fallback
stgraber on Github
lxc-bot at linuxcontainers.org
Sat Sep 14 10:59:07 UTC 2019
A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 301 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20190914/5098c09c/attachment.bin>
-------------- next part --------------
From 5253892f7241356bd2f8ab58bedbd0767b9b82a2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber at ubuntu.com>
Date: Sat, 14 Sep 2019 12:58:02 +0200
Subject: [PATCH 1/2] shared/api: Make some NVIDIA fields omitempty
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Stéphane Graber <stgraber at ubuntu.com>
---
shared/api/resource.go | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/shared/api/resource.go b/shared/api/resource.go
index eea1b86f6e..6590398848 100644
--- a/shared/api/resource.go
+++ b/shared/api/resource.go
@@ -119,13 +119,13 @@ type ResourcesGPUCardSRIOV struct {
// ResourcesGPUCardNvidia represents additional information for NVIDIA GPUs
// API extension: resources_gpu
type ResourcesGPUCardNvidia struct {
- CUDAVersion string `json:"cuda_version" yaml:"cuda_version"`
- NVRMVersion string `json:"nvrm_version" yaml:"nvrm_version"`
+ CUDAVersion string `json:"cuda_version,omitempty" yaml:"cuda_version,omitempty"`
+ NVRMVersion string `json:"nvrm_version,omitempty" yaml:"nvrm_version,omitempty"`
Brand string `json:"brand" yaml:"brand"`
Model string `json:"model" yaml:"model"`
- UUID string `json:"uuid" yaml:"uuid"`
- Architecture string `json:"architecture" yaml:"architecture"`
+ UUID string `json:"uuid,omitempty" yaml:"uuid,omitempty"`
+ Architecture string `json:"architecture,omitempty" yaml:"architecture,omitempty"`
// API extension: resources_v2
CardName string `json:"card_name" yaml:"card_name"`
From b40089ed83d765f4812e9d7409d8c6377ada9ce4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber at ubuntu.com>
Date: Sat, 14 Sep 2019 12:58:14 +0200
Subject: [PATCH 2/2] lxd/resources: Implement NVIDIA device fallback
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Closes #6195
Signed-off-by: Stéphane Graber <stgraber at ubuntu.com>
---
lxd/resources/gpu.go | 67 +++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 66 insertions(+), 1 deletion(-)
diff --git a/lxd/resources/gpu.go b/lxd/resources/gpu.go
index f2963e6ece..a2d972d5e1 100644
--- a/lxd/resources/gpu.go
+++ b/lxd/resources/gpu.go
@@ -1,10 +1,12 @@
package resources
import (
+ "bufio"
"encoding/csv"
"fmt"
"io"
"io/ioutil"
+ "os"
"os/exec"
"path/filepath"
"strconv"
@@ -18,6 +20,66 @@ import (
)
var sysClassDrm = "/sys/class/drm"
+var procDriverNvidia = "/proc/driver/nvidia"
+
+func loadNvidiaProc() (map[string]*api.ResourcesGPUCardNvidia, error) {
+ nvidiaCards := map[string]*api.ResourcesGPUCardNvidia{}
+
+ gpusPath := filepath.Join(procDriverNvidia, "gpus")
+ if !sysfsExists(gpusPath) {
+ return nil, fmt.Errorf("No NVIDIA GPU proc driver")
+ }
+
+ // List the GPUs from /proc
+ entries, err := ioutil.ReadDir(gpusPath)
+ if err != nil {
+ return nil, errors.Wrapf(err, "Failed to list \"%s\"", gpusPath)
+ }
+
+ for _, entry := range entries {
+ entryName := entry.Name()
+ entryPath := filepath.Join(gpusPath, entryName)
+
+ if !sysfsExists(filepath.Join(entryPath, "information")) {
+ continue
+ }
+
+ // Get the GPU information
+ f, err := os.Open(filepath.Join(entryPath, "information"))
+ if err != nil {
+ return nil, errors.Wrapf(err, "Failed to open \"%s\"", filepath.Join(entryPath, "information"))
+ }
+ defer f.Close()
+
+ gpuInfo := bufio.NewScanner(f)
+ nvidiaCard := &api.ResourcesGPUCardNvidia{}
+ for gpuInfo.Scan() {
+ line := strings.TrimSpace(gpuInfo.Text())
+
+ fields := strings.SplitN(line, ":", 2)
+ if len(fields) != 2 {
+ continue
+ }
+
+ key := strings.TrimSpace(fields[0])
+ value := strings.TrimSpace(fields[1])
+
+ if key == "Model" {
+ nvidiaCard.Model = value
+ nvidiaCard.Brand = strings.Split(value, " ")[0]
+ }
+
+ if key == "Device Minor" {
+ nvidiaCard.CardName = fmt.Sprintf("nvidia%s", value)
+ nvidiaCard.CardDevice = fmt.Sprintf("195:%s", value)
+ }
+ }
+
+ nvidiaCards[entryName] = nvidiaCard
+ }
+
+ return nvidiaCards, nil
+}
func loadNvidiaContainer() (map[string]*api.ResourcesGPUCardNvidia, error) {
// Check for nvidia-container-cli
@@ -271,7 +333,10 @@ func GetGPU() (*api.ResourcesGPU, error) {
// Load NVIDIA information
nvidiaCards, err := loadNvidiaContainer()
if err != nil {
- nvidiaCards = map[string]*api.ResourcesGPUCardNvidia{}
+ nvidiaCards, err = loadNvidiaProc()
+ if err != nil {
+ nvidiaCards = map[string]*api.ResourcesGPUCardNvidia{}
+ }
}
// Temporary variables
More information about the lxc-devel
mailing list