[lxc-devel] [lxc/master] hooks: change the semantic of NVIDIA_VISIBLE_DEVICES=""
flx42 on Github
lxc-bot at linuxcontainers.org
Fri Feb 2 14:44:54 UTC 2018
A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 790 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20180202/0d282a43/attachment.bin>
-------------- next part --------------
From b87243830e3b5e95fa31a17cf1bfebe55353bf13 Mon Sep 17 00:00:00 2001
From: Felix Abecassis <fabecassis at nvidia.com>
Date: Fri, 2 Feb 2018 06:19:13 -0800
Subject: [PATCH] hooks: change the semantic of NVIDIA_VISIBLE_DEVICES=""
With LXC, you can override the value of an environment variable to
null, but you can't unset an existing variable.
The NVIDIA hook was previously activated when NVIDIA_VISIBLE_DEVICES
was set to null. As a result, it was not possible to disable the hook
by overriding the environment variable in the configuration.
The hook can now be disabled by setting NVIDIA_VISIBLE_DEVICES to
null or to the new special value "void".
Signed-off-by: Felix Abecassis <fabecassis at nvidia.com>
---
hooks/nvidia | 53 +++++++++++++++++++++++++++--------------------------
1 file changed, 27 insertions(+), 26 deletions(-)
diff --git a/hooks/nvidia b/hooks/nvidia
index 614c9e191..fbe05626c 100755
--- a/hooks/nvidia
+++ b/hooks/nvidia
@@ -4,11 +4,32 @@
set -eu
-if [ -z "${CUDA_VERSION+x}" ] && [ -z "${NVIDIA_VISIBLE_DEVICES+x}" ]; then
- # Not a GPU container, nothing to do, exit early.
+# NVIDIA_VISIBLE_DEVICES="" *or* NVIDIA_VISIBLE_DEVICES="void"
+# GPU support was explicitly disabled, exit early.
+if [ -z "${NVIDIA_VISIBLE_DEVICES-x}" ] || [ "${NVIDIA_VISIBLE_DEVICES:-}" = "void" ]; then
exit 0
fi
+# https://github.com/nvidia/nvidia-container-runtime#cuda_version
+if [ -n "${CUDA_VERSION:-}" ] && [ -z "${NVIDIA_REQUIRE_CUDA:-}" ]; then
+ # Legacy CUDA image: default to all devices and all driver capabilities.
+ if [ -z "${NVIDIA_VISIBLE_DEVICES+x}" ]; then
+ NVIDIA_VISIBLE_DEVICES="all"
+ fi
+ if [ -z "${NVIDIA_DRIVER_CAPABILITIES:-}" ]; then
+ NVIDIA_DRIVER_CAPABILITIES="all"
+ fi
+ if [[ "${CUDA_VERSION}" =~ ^[0-9]+\.[0-9]+ ]]; then
+ NVIDIA_REQUIRE_CUDA="cuda>=${BASH_REMATCH[0]}"
+ fi
+else
+ # NVIDIA_VISIBLE_DEVICES unset and it's not a legacy CUDA image.
+ # This is not a GPU image, exit early.
+ if [ -z "${NVIDIA_VISIBLE_DEVICES+x}" ]; then
+ exit 0
+ fi
+fi
+
export PATH=$PATH:/usr/sbin:/usr/bin:/sbin:/bin
if ! which nvidia-container-cli >/dev/null; then
echo "ERROR: Missing tool nvidia-container-cli, see https://github.com/NVIDIA/libnvidia-container" >&2
@@ -128,7 +149,7 @@ if [ "${USERNS}" = "yes" ]; then
fi
# https://github.com/nvidia/nvidia-container-runtime#nvidia_disable_require
-if [ -n "${NVIDIA_DISABLE_REQUIRE+x}" ]; then
+if [ -n "${NVIDIA_DISABLE_REQUIRE:-}" ]; then
if [ "$(parse_bool "${NVIDIA_DISABLE_REQUIRE}")" = "true" ]; then
CLI_DISABLE_REQUIRE="true"
fi
@@ -152,15 +173,12 @@ if [ -z "${CLI_LDCONFIG}" ]; then
fi
# https://github.com/nvidia/nvidia-container-runtime#nvidia_visible_devices
-CLI_DEVICES=
-if [ -n "${NVIDIA_VISIBLE_DEVICES+x}" ]; then
- CLI_DEVICES="${NVIDIA_VISIBLE_DEVICES}"
-fi
+CLI_DEVICES="${NVIDIA_VISIBLE_DEVICES}"
# https://github.com/nvidia/nvidia-container-runtime#nvidia_driver_capabilities
CLI_CAPABILITIES=
-if [ -n "${NVIDIA_DRIVER_CAPABILITIES+x}" ]; then
- CLI_CAPABILITIES="${NVIDIA_DRIVER_CAPABILITIES//,/ }"
+if [ -n "${NVIDIA_DRIVER_CAPABILITIES:-}" ]; then
+ CLI_CAPABILITIES="${NVIDIA_DRIVER_CAPABILITIES//,/ }"
fi
# https://github.com/nvidia/nvidia-container-runtime#nvidia_require_
@@ -169,23 +187,6 @@ for req in $(compgen -e "NVIDIA_REQUIRE_"); do
CLI_REQUIREMENTS="${CLI_REQUIREMENTS} ${!req}"
done
-# https://github.com/nvidia/nvidia-container-runtime#cuda_version
-if [ -n "${CUDA_VERSION+x}" ] && [ -z "${NVIDIA_REQUIRE_CUDA+x}" ]; then
- # Legacy CUDA image detected, default to all devices and all driver capabilities.
- if [ -z "${CLI_DEVICES}" ]; then
- CLI_DEVICES="all"
- fi
-
- if [ -z "${CLI_CAPABILITIES}" ]; then
- CLI_CAPABILITIES="all"
- fi
-
- # Transform CUDA_VERSION=X.Y to a "cuda>=X.Y" constraint for nvidia-container-cli.
- if [[ "${CUDA_VERSION}" =~ ^[0-9]+\.[0-9]+ ]]; then
- CLI_REQUIREMENTS="${CLI_REQUIREMENTS} cuda>=${BASH_REMATCH[0]}"
- fi
-fi
-
if [ "${CLI_CAPABILITIES}" = "all" ]; then
CLI_CAPABILITIES="compute compat32 graphics utility video"
fi
More information about the lxc-devel
mailing list