[lxc-devel] [lxc/master] hooks: add mount hook to configure access to NVIDIA GPUs

flx42 on Github lxc-bot at linuxcontainers.org
Fri Dec 8 21:51:03 UTC 2017


A non-text attachment was scrubbed...
Name: not available
Type: text/x-mailbox
Size: 717 bytes
Desc: not available
URL: <http://lists.linuxcontainers.org/pipermail/lxc-devel/attachments/20171208/07d3d2ae/attachment.bin>
-------------- next part --------------
From 13dfbc61783df8c617904aa8918033801fdab46d Mon Sep 17 00:00:00 2001
From: Felix Abecassis <fabecassis at nvidia.com>
Date: Fri, 8 Dec 2017 13:49:28 -0800
Subject: [PATCH] hooks: add mount hook to configure access to NVIDIA GPUs

This hook requires the nvidia-container-cli tool provided by libnvidia-container:
https://github.com/nvidia/libnvidia-container

For containers that do not have CUDA_VERSION or NVIDIA_VISIBLE_DEVICES
set in the environment, the hook will be a no-op.

To enable in the configuration file:
lxc.hook.mount = /usr/local/share/lxc/hooks/nvidia

Signed-off-by: Felix Abecassis <fabecassis at nvidia.com>
---
 hooks/Makefile.am |   3 +-
 hooks/nvidia      | 224 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 226 insertions(+), 1 deletion(-)
 create mode 100755 hooks/nvidia

diff --git a/hooks/Makefile.am b/hooks/Makefile.am
index b8b8f532d..90dd7d8c0 100644
--- a/hooks/Makefile.am
+++ b/hooks/Makefile.am
@@ -8,7 +8,8 @@ hooks_SCRIPTS = \
 	dhclient-script \
 	dhclient-start \
 	dhclient-stop \
-	squid-deb-proxy-client
+	squid-deb-proxy-client \
+	nvidia
 
 binhooks_PROGRAMS = \
 	unmount-namespace
diff --git a/hooks/nvidia b/hooks/nvidia
new file mode 100755
index 000000000..d53e7491f
--- /dev/null
+++ b/hooks/nvidia
@@ -0,0 +1,224 @@
+#! /bin/bash
+
+# Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
+
+set -eu
+
+if [ -z "${CUDA_VERSION+x}" ] && [ -z "${NVIDIA_VISIBLE_DEVICES+x}" ]; then
+    # Not a GPU container, nothing to do, exit early.
+    exit 0
+fi
+
+export PATH=$PATH:/usr/sbin:/usr/bin:/sbin:/bin
+if ! which nvidia-container-cli >/dev/null; then
+    echo "ERROR: Missing tool nvidia-container-cli, see https://github.com/NVIDIA/libnvidia-container" >&2
+    exit 1
+fi
+
+in_userns() {
+    [ -e /proc/self/uid_map ] || { echo no; return; }
+    while read line; do
+        fields=$(echo $line | awk '{ print $1 " " $2 " " $3 }')
+        [ "$fields" = "0 0 4294967295" ] && { echo no; return; } || true
+        echo $fields | grep -q " 0 1$" && { echo userns-root; return; } || true
+    done < /proc/self/uid_map
+
+    [ "$(cat /proc/self/uid_map)" = "$(cat /proc/1/uid_map)" ] && \
+        { echo userns-root; return; }
+    echo yes
+}
+
+get_ldconfig() {
+    echo $(which "ldconfig.real" || which "ldconfig")
+    return
+}
+
+capability_to_cli() {
+    case "$1" in
+        compute)  echo "--compute";;
+        compat32) echo "--compat32";;
+        graphics) echo "--graphics";;
+        utility)  echo "--utility";;
+        video)    echo "--video";;
+        *)        exit 1;;
+    esac
+    return
+}
+
+# Same behavior as strconv.ParseBool in golang
+parse_bool() {
+    case "$1" in
+        1|t|T|TRUE|true|True)    echo "true";;
+        0|f|F|FALSE|false|False) echo "false";;
+        *)                       exit 1;;
+    esac
+    return
+}
+
+usage() {
+    cat <<EOF
+nvidia-container-cli hook for LXC
+
+Special arguments:
+[ -h | --help ]: Print this help message and exit.
+
+Optional arguments:
+[ --no-load-kmods ]: Do not try to load the NVIDIA kernel modules.
+[ --disable-require ]: Disable all the constraints of the form NVIDIA_REQUIRE_*.
+[ --debug <path> ]: The path to the log file.
+[ --ldconfig <path> ]: The path to the ldconfig binary, use a '@' prefix for a host path.
+EOF
+    return 0
+}
+
+options=$(getopt -o h -l help,no-load-kmods,disable-require,debug:,ldconfig: -- "$@")
+if [ $? -ne 0 ]; then
+    usage
+    exit 1
+fi
+eval set -- "$options"
+
+CLI_LOAD_KMODS="true"
+CLI_DISABLE_REQUIRE="false"
+CLI_DEBUG=
+CLI_LDCONFIG=
+
+while :; do
+    case "$1" in
+        --help)             usage && exit 1;;
+        --no-load-kmods)    CLI_LOAD_KMODS="false"; shift 1;;
+        --disable-require)  CLI_DISABLE_REQUIRE="true"; shift 1;;
+        --debug)            CLI_DEBUG=$2; shift 2;;
+        --ldconfig)         CLI_LDCONFIG=$2; shift 2;;
+        --)                 shift 1; break;;
+        *)                  break;;
+    esac
+done
+
+if [ "$#" -lt 3 ] || [ "$2" != "lxc" ]; then
+    echo "ERROR: Not running through LXC." 1>&2
+    exit 1
+fi
+
+if [ "$3" != "mount" ]; then
+    echo "ERROR: This hook must be used as a \"mount\" hook." >&2
+    exit 1
+fi
+
+USERNS=$(in_userns)
+if [ "${USERNS}" != "yes" ]; then
+    # This is a limitation of libnvidia-container.
+    echo "FIXME: This hook currently only works in unprivileged mode." >&2
+    exit 1
+fi
+
+if [ "${USERNS}" = "yes" ]; then
+    CLI_LOAD_KMODS="false"
+    if ! grep -q nvidia_uvm /proc/modules; then
+        echo "WARN: Kernel module nvidia_uvm is not loaded, nvidia-container-cli might fail. Make sure the NVIDIA device driver is installed and loaded." >&2
+    fi
+fi
+
+# https://github.com/nvidia/nvidia-container-runtime#nvidia_disable_require
+if [ -n "${NVIDIA_DISABLE_REQUIRE+x}" ]; then
+    if [ $(parse_bool "${NVIDIA_DISABLE_REQUIRE}") = "true" ]; then
+        CLI_DISABLE_REQUIRE="true"
+    fi
+fi
+
+if [ -z "${CLI_DEBUG}" ]; then
+    if [ "${LXC_LOG_LEVEL}" = "DEBUG" ] || [ "${LXC_LOG_LEVEL}" = "TRACE" ]; then
+        rootfs_path="${LXC_ROOTFS_PATH#*:}"
+        hookdir="${rootfs_path/%rootfs/hook}"
+        if mkdir -p "${hookdir}"; then
+            CLI_DEBUG="${hookdir}/nvidia.log"
+        fi
+    fi
+fi
+
+# A '@' prefix means a host path.
+if [ -z "${CLI_LDCONFIG}" ]; then
+    CLI_LDCONFIG="@$(get_ldconfig)"
+fi
+
+# https://github.com/nvidia/nvidia-container-runtime#nvidia_visible_devices
+CLI_DEVICES=
+if [ -n "${NVIDIA_VISIBLE_DEVICES+x}" ]; then
+    CLI_DEVICES="${NVIDIA_VISIBLE_DEVICES}"
+fi
+
+# https://github.com/nvidia/nvidia-container-runtime#nvidia_driver_capabilities
+CLI_CAPABILITIES=
+if [ -n "${NVIDIA_DRIVER_CAPABILITIES+x}" ]; then
+    CLI_CAPABILITIES="${NVIDIA_DRIVER_CAPABILITIES//,/ }"
+fi
+
+# https://github.com/nvidia/nvidia-container-runtime#nvidia_require_
+CLI_REQUIREMENTS=
+for req in $(compgen -e "NVIDIA_REQUIRE_"); do
+    CLI_REQUIREMENTS="${CLI_REQUIREMENTS} ${!req}"
+done
+
+# https://github.com/nvidia/nvidia-container-runtime#cuda_version
+if [ -n "${CUDA_VERSION+x}" ] && [ -z "${NVIDIA_REQUIRE_CUDA+x}" ]; then
+    # Legacy CUDA image detected, default to all devices and all driver capabilities.
+    if [ -z "${CLI_DEVICES}" ]; then
+        CLI_DEVICES="all"
+    fi
+
+    if [ -z "${CLI_CAPABILITIES}" ]; then
+        CLI_CAPABILITIES="all"
+    fi
+
+    # Transform CUDA_VERSION=X.Y to a "cuda>=X.Y" constraint for nvidia-container-cli.
+    if [[ "${CUDA_VERSION}" =~ ^[0-9]+\.[0-9]+ ]]; then
+        CLI_REQUIREMENTS="${CLI_REQUIREMENTS} cuda>=${BASH_REMATCH[0]}"
+    fi
+fi
+
+if [ "${CLI_CAPABILITIES}" = "all" ]; then
+    CLI_CAPABILITIES="compute compat32 graphics utility video"
+fi
+
+global_args=("")
+configure_args=("")
+
+if [ -n "${CLI_DEBUG}" ]; then
+    echo "INFO: Writing nvidia-container-cli log at ${CLI_DEBUG}." >&2
+    global_args+=("--debug=${CLI_DEBUG}")
+fi
+
+if [ "${CLI_LOAD_KMODS}" = "true" ]; then
+    global_args+=(--load-kmods)
+fi
+
+if [ "${USERNS}" = "yes" ]; then
+    global_args+=(--userspec)
+    configure_args+=(--no-cgroups)
+fi
+
+if [ -n "${CLI_LDCONFIG}" ]; then
+    configure_args+=(--ldconfig="${CLI_LDCONFIG}")
+fi
+
+if [ -n "${CLI_DEVICES}" ] && [ "${CLI_DEVICES}" != "none" ]; then
+    configure_args+=(--device="${CLI_DEVICES}")
+fi
+
+for cap in ${CLI_CAPABILITIES}; do
+    if arg=$(capability_to_cli "${cap}"); then
+        configure_args+=("${arg}")
+    else
+        echo "ERROR: Unknown driver capability \"${cap}\"." >&2
+        exit 1
+    fi
+done
+
+if [ "${CLI_DISABLE_REQUIRE}" = "false" ]; then
+    for req in ${CLI_REQUIREMENTS}; do
+        configure_args+=(--require="${req}")
+    done
+fi
+
+set -x
+exec nvidia-container-cli ${global_args[@]} configure ${configure_args[@]} $LXC_ROOTFS_MOUNT


More information about the lxc-devel mailing list