[lxc-devel] [lxd/master] Add support for NVIDIA runtime passthrough

stgraber on Github lxc-bot at linuxcontainers.org
Thu Mar 29 18:03:32 UTC 2018


A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 354 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20180329/dcef123b/attachment.bin>
-------------- next part --------------
From 65cec7f2c884ea2490fe9376d10abe8aefa97b84 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber at ubuntu.com>
Date: Thu, 29 Mar 2018 14:03:02 -0400
Subject: [PATCH] Add support for NVIDIA runtime passthrough
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Stéphane Graber <stgraber at ubuntu.com>
---
 doc/api-extensions.md   |  5 +++++
 doc/containers.md       |  2 ++
 lxd/container_lxc.go    | 33 +++++++++++++++++++++++++++++++++
 scripts/bash/lxd-client |  2 +-
 shared/container.go     |  2 ++
 shared/version/api.go   |  1 +
 6 files changed, 44 insertions(+), 1 deletion(-)

diff --git a/doc/api-extensions.md b/doc/api-extensions.md
index 7ece1b0f4..e5cd6e2ac 100644
--- a/doc/api-extensions.md
+++ b/doc/api-extensions.md
@@ -449,3 +449,8 @@ This adds a new `lifecycle` message type to the events API.
 
 ## storage\_api\_remote\_volume\_handling
 This adds the ability to copy and move custom storage volumes between remote.
+
+## nvidia\_runtime
+Adds a `nvidia_runtime` config option for containers, setting this to
+true will have the NVIDIA runtime and CUDA libraries passed to the
+container.
diff --git a/doc/containers.md b/doc/containers.md
index 42152f964..46d2af9fd 100644
--- a/doc/containers.md
+++ b/doc/containers.md
@@ -15,6 +15,7 @@ currently supported:
  - `environment` (environment variables)
  - `image` (copy of the image properties at time of creation)
  - `limits` (resource limits)
+ - `nvidia` (NVIDIA and CUDA configuration)
  - `raw` (raw container configuration overrides)
  - `security` (security policies)
  - `user` (storage for user properties, searchable)
@@ -45,6 +46,7 @@ linux.kernel\_modules                   | string    | -             | yes
 migration.incremental.memory            | boolean   | false         | yes           | migration\_pre\_copy                 | Incremental memory transfer of the container's memory to reduce downtime.
 migration.incremental.memory.goal       | integer   | 70            | yes           | migration\_pre\_copy                 | Percentage of memory to have in sync before stopping the container.
 migration.incremental.memory.iterations | integer   | 10            | yes           | migration\_pre\_copy                 | Maximum number of transfer operations to go through before stopping the container.
+nvidia.runtime                          | boolean   | false         | no            | nvidia\_runtime                      | Pass the host NVIDIA and CUDA runtime libraries into the container
 raw.apparmor                            | blob      | -             | yes           | -                                    | Apparmor profile entries to be appended to the generated profile
 raw.idmap                               | blob      | -             | no            | id\_map                              | Raw idmap configuration (e.g. "both 1000 1000")
 raw.lxc                                 | blob      | -             | no            | -                                    | Raw LXC configuration to be appended to the generated one
diff --git a/lxd/container_lxc.go b/lxd/container_lxc.go
index 8195b9da9..a0ea90b2b 100644
--- a/lxd/container_lxc.go
+++ b/lxd/container_lxc.go
@@ -1165,6 +1165,39 @@ func (c *containerLXC) initLXC(config bool) error {
 		}
 	}
 
+	// Setup NVIDIA runtime
+	if shared.IsTrue(c.expandedConfig["nvidia.runtime"]) {
+		hookDir := os.Getenv("LXD_LXC_HOOK")
+		if hookDir == "" {
+			hookDir = "/usr/share/lxc/hooks"
+		}
+
+		hookPath := filepath.Join(hookDir, "nvidia")
+		if !shared.PathExists(hookPath) {
+			return fmt.Errorf("The NVIDIA LXC hook couldn't be found")
+		}
+
+		_, err := exec.LookPath("nvidia-container-cli")
+		if err != nil {
+			return fmt.Errorf("The NVIDIA container tools couldn't be found")
+		}
+
+		err = lxcSetConfigItem(cc, "lxc.environment", "NVIDIA_VISIBLE_DEVICES=none")
+		if err != nil {
+			return err
+		}
+
+		err = lxcSetConfigItem(cc, "lxc.environment", "NVIDIA_DRIVER_CAPABILITIES=compute,utility")
+		if err != nil {
+			return err
+		}
+
+		err = lxcSetConfigItem(cc, "lxc.hook.mount", hookPath)
+		if err != nil {
+			return err
+		}
+	}
+
 	// Memory limits
 	if c.state.OS.CGroupMemoryController {
 		memory := c.expandedConfig["limits.memory"]
diff --git a/scripts/bash/lxd-client b/scripts/bash/lxd-client
index 671c1b773..bc4d4a8cb 100644
--- a/scripts/bash/lxd-client
+++ b/scripts/bash/lxd-client
@@ -80,7 +80,7 @@ _have lxc && {
       limits.disk.priority limits.memory limits.memory.enforce \
       limits.memory.swap limits.memory.swap.priority limits.network.priority \
       limits.processes linux.kernel_modules migration.incremental.memory \
-      migration.incremental.memory.goal \
+      migration.incremental.memory.goal nvidia.runtime \
       migration.incremental.memory.iterations raw.apparmor raw.idmap raw.lxc \
       raw.seccomp security.idmap.base security.idmap.isolated \
       security.idmap.size security.devlxd security.nesting security.privileged \
diff --git a/shared/container.go b/shared/container.go
index 3836b80f3..b6cfc7ada 100644
--- a/shared/container.go
+++ b/shared/container.go
@@ -206,6 +206,8 @@ var KnownContainerConfigKeys = map[string]func(value string) error{
 	"migration.incremental.memory.iterations": IsUint32,
 	"migration.incremental.memory.goal":       IsUint32,
 
+	"nvidia.runtime": IsBool,
+
 	"security.nesting":    IsBool,
 	"security.privileged": IsBool,
 	"security.devlxd":     IsBool,
diff --git a/shared/version/api.go b/shared/version/api.go
index f6e718a42..bec41352f 100644
--- a/shared/version/api.go
+++ b/shared/version/api.go
@@ -101,6 +101,7 @@ var APIExtensions = []string{
 	"clustering",
 	"event_lifecycle",
 	"storage_api_remote_volume_handling",
+	"nvidia_runtime",
 }
 
 // APIExtensionsCount returns the number of available API extensions.


More information about the lxc-devel mailing list