[lxc-devel] [lxc/master] hooks: change the semantic of NVIDIA_VISIBLE_DEVICES=""

flx42 on Github lxc-bot at linuxcontainers.org
Fri Feb 2 14:44:54 UTC 2018


A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 790 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20180202/0d282a43/attachment.bin>
-------------- next part --------------
From b87243830e3b5e95fa31a17cf1bfebe55353bf13 Mon Sep 17 00:00:00 2001
From: Felix Abecassis <fabecassis at nvidia.com>
Date: Fri, 2 Feb 2018 06:19:13 -0800
Subject: [PATCH] hooks: change the semantic of NVIDIA_VISIBLE_DEVICES=""

With LXC, you can override the value of an environment variable to
null, but you can't unset an existing variable.

The NVIDIA hook was previously activated when NVIDIA_VISIBLE_DEVICES
was set to null. As a result, it was not possible to disable the hook
by overriding the environment variable in the configuration.

The hook can now be disabled by setting NVIDIA_VISIBLE_DEVICES to
null or to the new special value "void".

Signed-off-by: Felix Abecassis <fabecassis at nvidia.com>
---
 hooks/nvidia | 53 +++++++++++++++++++++++++++--------------------------
 1 file changed, 27 insertions(+), 26 deletions(-)

diff --git a/hooks/nvidia b/hooks/nvidia
index 614c9e191..fbe05626c 100755
--- a/hooks/nvidia
+++ b/hooks/nvidia
@@ -4,11 +4,32 @@
 
 set -eu
 
-if [ -z "${CUDA_VERSION+x}" ] && [ -z "${NVIDIA_VISIBLE_DEVICES+x}" ]; then
-    # Not a GPU container, nothing to do, exit early.
+# NVIDIA_VISIBLE_DEVICES="" *or* NVIDIA_VISIBLE_DEVICES="void"
+# GPU support was explicitly disabled, exit early.
+if [ -z "${NVIDIA_VISIBLE_DEVICES-x}" ] || [ "${NVIDIA_VISIBLE_DEVICES:-}" = "void" ]; then
     exit 0
 fi
 
+# https://github.com/nvidia/nvidia-container-runtime#cuda_version
+if [ -n "${CUDA_VERSION:-}" ] && [ -z "${NVIDIA_REQUIRE_CUDA:-}" ]; then
+    # Legacy CUDA image: default to all devices and all driver capabilities.
+    if [ -z "${NVIDIA_VISIBLE_DEVICES+x}" ]; then
+	NVIDIA_VISIBLE_DEVICES="all"
+    fi
+    if [ -z "${NVIDIA_DRIVER_CAPABILITIES:-}" ]; then
+	NVIDIA_DRIVER_CAPABILITIES="all"
+    fi
+    if [[ "${CUDA_VERSION}" =~ ^[0-9]+\.[0-9]+ ]]; then
+        NVIDIA_REQUIRE_CUDA="cuda>=${BASH_REMATCH[0]}"
+    fi
+else
+    # NVIDIA_VISIBLE_DEVICES unset and it's not a legacy CUDA image.
+    # This is not a GPU image, exit early.
+    if [ -z "${NVIDIA_VISIBLE_DEVICES+x}" ]; then
+	exit 0
+    fi
+fi
+
 export PATH=$PATH:/usr/sbin:/usr/bin:/sbin:/bin
 if ! which nvidia-container-cli >/dev/null; then
     echo "ERROR: Missing tool nvidia-container-cli, see https://github.com/NVIDIA/libnvidia-container" >&2
@@ -128,7 +149,7 @@ if [ "${USERNS}" = "yes" ]; then
 fi
 
 # https://github.com/nvidia/nvidia-container-runtime#nvidia_disable_require
-if [ -n "${NVIDIA_DISABLE_REQUIRE+x}" ]; then
+if [ -n "${NVIDIA_DISABLE_REQUIRE:-}" ]; then
     if [ "$(parse_bool "${NVIDIA_DISABLE_REQUIRE}")" = "true" ]; then
         CLI_DISABLE_REQUIRE="true"
     fi
@@ -152,15 +173,12 @@ if [ -z "${CLI_LDCONFIG}" ]; then
 fi
 
 # https://github.com/nvidia/nvidia-container-runtime#nvidia_visible_devices
-CLI_DEVICES=
-if [ -n "${NVIDIA_VISIBLE_DEVICES+x}" ]; then
-    CLI_DEVICES="${NVIDIA_VISIBLE_DEVICES}"
-fi
+CLI_DEVICES="${NVIDIA_VISIBLE_DEVICES}"
 
 # https://github.com/nvidia/nvidia-container-runtime#nvidia_driver_capabilities
 CLI_CAPABILITIES=
-if [ -n "${NVIDIA_DRIVER_CAPABILITIES+x}" ]; then
-    CLI_CAPABILITIES="${NVIDIA_DRIVER_CAPABILITIES//,/ }"
+if [ -n "${NVIDIA_DRIVER_CAPABILITIES:-}" ]; then
+     CLI_CAPABILITIES="${NVIDIA_DRIVER_CAPABILITIES//,/ }"
 fi
 
 # https://github.com/nvidia/nvidia-container-runtime#nvidia_require_
@@ -169,23 +187,6 @@ for req in $(compgen -e "NVIDIA_REQUIRE_"); do
     CLI_REQUIREMENTS="${CLI_REQUIREMENTS} ${!req}"
 done
 
-# https://github.com/nvidia/nvidia-container-runtime#cuda_version
-if [ -n "${CUDA_VERSION+x}" ] && [ -z "${NVIDIA_REQUIRE_CUDA+x}" ]; then
-    # Legacy CUDA image detected, default to all devices and all driver capabilities.
-    if [ -z "${CLI_DEVICES}" ]; then
-        CLI_DEVICES="all"
-    fi
-
-    if [ -z "${CLI_CAPABILITIES}" ]; then
-        CLI_CAPABILITIES="all"
-    fi
-
-    # Transform CUDA_VERSION=X.Y to a "cuda>=X.Y" constraint for nvidia-container-cli.
-    if [[ "${CUDA_VERSION}" =~ ^[0-9]+\.[0-9]+ ]]; then
-        CLI_REQUIREMENTS="${CLI_REQUIREMENTS} cuda>=${BASH_REMATCH[0]}"
-    fi
-fi
-
 if [ "${CLI_CAPABILITIES}" = "all" ]; then
     CLI_CAPABILITIES="compute compat32 graphics utility video"
 fi


More information about the lxc-devel mailing list