[lxc-devel] [lxd/master] Add support for NVIDIA runtime passthrough
stgraber on Github
lxc-bot at linuxcontainers.org
Thu Mar 29 18:03:32 UTC 2018
A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 354 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20180329/dcef123b/attachment.bin>
-------------- next part --------------
From 65cec7f2c884ea2490fe9376d10abe8aefa97b84 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber at ubuntu.com>
Date: Thu, 29 Mar 2018 14:03:02 -0400
Subject: [PATCH] Add support for NVIDIA runtime passthrough
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Stéphane Graber <stgraber at ubuntu.com>
---
doc/api-extensions.md | 5 +++++
doc/containers.md | 2 ++
lxd/container_lxc.go | 33 +++++++++++++++++++++++++++++++++
scripts/bash/lxd-client | 2 +-
shared/container.go | 2 ++
shared/version/api.go | 1 +
6 files changed, 44 insertions(+), 1 deletion(-)
diff --git a/doc/api-extensions.md b/doc/api-extensions.md
index 7ece1b0f4..e5cd6e2ac 100644
--- a/doc/api-extensions.md
+++ b/doc/api-extensions.md
@@ -449,3 +449,8 @@ This adds a new `lifecycle` message type to the events API.
## storage\_api\_remote\_volume\_handling
This adds the ability to copy and move custom storage volumes between remote.
+
+## nvidia\_runtime
+Adds a `nvidia_runtime` config option for containers, setting this to
+true will have the NVIDIA runtime and CUDA libraries passed to the
+container.
diff --git a/doc/containers.md b/doc/containers.md
index 42152f964..46d2af9fd 100644
--- a/doc/containers.md
+++ b/doc/containers.md
@@ -15,6 +15,7 @@ currently supported:
- `environment` (environment variables)
- `image` (copy of the image properties at time of creation)
- `limits` (resource limits)
+ - `nvidia` (NVIDIA and CUDA configuration)
- `raw` (raw container configuration overrides)
- `security` (security policies)
- `user` (storage for user properties, searchable)
@@ -45,6 +46,7 @@ linux.kernel\_modules | string | - | yes
migration.incremental.memory | boolean | false | yes | migration\_pre\_copy | Incremental memory transfer of the container's memory to reduce downtime.
migration.incremental.memory.goal | integer | 70 | yes | migration\_pre\_copy | Percentage of memory to have in sync before stopping the container.
migration.incremental.memory.iterations | integer | 10 | yes | migration\_pre\_copy | Maximum number of transfer operations to go through before stopping the container.
+nvidia.runtime | boolean | false | no | nvidia\_runtime | Pass the host NVIDIA and CUDA runtime libraries into the container
raw.apparmor | blob | - | yes | - | Apparmor profile entries to be appended to the generated profile
raw.idmap | blob | - | no | id\_map | Raw idmap configuration (e.g. "both 1000 1000")
raw.lxc | blob | - | no | - | Raw LXC configuration to be appended to the generated one
diff --git a/lxd/container_lxc.go b/lxd/container_lxc.go
index 8195b9da9..a0ea90b2b 100644
--- a/lxd/container_lxc.go
+++ b/lxd/container_lxc.go
@@ -1165,6 +1165,39 @@ func (c *containerLXC) initLXC(config bool) error {
}
}
+ // Setup NVIDIA runtime
+ if shared.IsTrue(c.expandedConfig["nvidia.runtime"]) {
+ hookDir := os.Getenv("LXD_LXC_HOOK")
+ if hookDir == "" {
+ hookDir = "/usr/share/lxc/hooks"
+ }
+
+ hookPath := filepath.Join(hookDir, "nvidia")
+ if !shared.PathExists(hookPath) {
+ return fmt.Errorf("The NVIDIA LXC hook couldn't be found")
+ }
+
+ _, err := exec.LookPath("nvidia-container-cli")
+ if err != nil {
+ return fmt.Errorf("The NVIDIA container tools couldn't be found")
+ }
+
+ err = lxcSetConfigItem(cc, "lxc.environment", "NVIDIA_VISIBLE_DEVICES=none")
+ if err != nil {
+ return err
+ }
+
+ err = lxcSetConfigItem(cc, "lxc.environment", "NVIDIA_DRIVER_CAPABILITIES=compute,utility")
+ if err != nil {
+ return err
+ }
+
+ err = lxcSetConfigItem(cc, "lxc.hook.mount", hookPath)
+ if err != nil {
+ return err
+ }
+ }
+
// Memory limits
if c.state.OS.CGroupMemoryController {
memory := c.expandedConfig["limits.memory"]
diff --git a/scripts/bash/lxd-client b/scripts/bash/lxd-client
index 671c1b773..bc4d4a8cb 100644
--- a/scripts/bash/lxd-client
+++ b/scripts/bash/lxd-client
@@ -80,7 +80,7 @@ _have lxc && {
limits.disk.priority limits.memory limits.memory.enforce \
limits.memory.swap limits.memory.swap.priority limits.network.priority \
limits.processes linux.kernel_modules migration.incremental.memory \
- migration.incremental.memory.goal \
+ migration.incremental.memory.goal nvidia.runtime \
migration.incremental.memory.iterations raw.apparmor raw.idmap raw.lxc \
raw.seccomp security.idmap.base security.idmap.isolated \
security.idmap.size security.devlxd security.nesting security.privileged \
diff --git a/shared/container.go b/shared/container.go
index 3836b80f3..b6cfc7ada 100644
--- a/shared/container.go
+++ b/shared/container.go
@@ -206,6 +206,8 @@ var KnownContainerConfigKeys = map[string]func(value string) error{
"migration.incremental.memory.iterations": IsUint32,
"migration.incremental.memory.goal": IsUint32,
+ "nvidia.runtime": IsBool,
+
"security.nesting": IsBool,
"security.privileged": IsBool,
"security.devlxd": IsBool,
diff --git a/shared/version/api.go b/shared/version/api.go
index f6e718a42..bec41352f 100644
--- a/shared/version/api.go
+++ b/shared/version/api.go
@@ -101,6 +101,7 @@ var APIExtensions = []string{
"clustering",
"event_lifecycle",
"storage_api_remote_volume_handling",
+ "nvidia_runtime",
}
// APIExtensionsCount returns the number of available API extensions.
More information about the lxc-devel
mailing list