From 071b607807a9543709d1ffc7ccc66f1306999764 Mon Sep 17 00:00:00 2001
From: Tianlei Wu <tlwu@microsoft.com>
Date: Wed, 9 Oct 2024 12:06:33 -0700
Subject: [PATCH] [CUDA] Add CUDA_VERSION and CUDNN_VERSION etc. arguments to
 Dockerfile.cuda (#22351)

### Description

* Add a few arguments CUDA_VERSION, CUDNN_VERSION, OS, GIT_COMMIT,
GIT_BRANCH and ONNXRUNTIME_VERSION to the Dockerfile.cuda to allow for
more flexibility in the build process.
* Update README.md to include the new arguments and their usage.
* Output labels to image so that it is easy to inspect the image.

Available CUDA versions for ubuntu 24.04 can be found
[here](https://hub.docker.com/r/nvidia/cuda/tags), and available CUDNN
versions can be found
[here](https://pypi.org/project/nvidia-cudnn-cu12/#history). Example
command line to build docker image:
```
  docker build -t onnxruntime-cuda --build-arg CUDA_VERSION=12.6.1 \
                                   --build-arg CUDNN_VERSION=9.5.0.50 \
                                   --build-arg GIT_BRANCH=$(git rev-parse --abbrev-ref HEAD) \
                                   --build-arg GIT_COMMIT=$(git rev-parse HEAD) \
                                   --build-arg ONNXRUNTIME_VERSION=$(cat ../VERSION_NUMBER) \
                                   -f Dockerfile.cuda ..
```

Example labels from `docker inspect onnxruntime-cuda`:
```
            "Labels": {
                "CUDA_VERSION": "12.6.1",
                "CUDNN_VERSION": "9.5.0.50",
                "maintainer": "Changming Sun <chasun@microsoft.com>",
                "onnxruntime_git_branch": "main",
                "onnxruntime_git_commit": "bc84958dcef5c6017ae58085f55b669efd74f4a5",
                "onnxruntime_version": "1.20.0",
                "org.opencontainers.image.ref.name": "ubuntu",
                "org.opencontainers.image.version": "24.04"
            }
```

### Motivation and Context
https://github.com/microsoft/onnxruntime/pull/22339 has hard-coded the
cuda and cudnn versions. User might want to choose specified cuda and
cudnn version during building docker image.
---
 dockerfiles/Dockerfile.cuda | 55 ++++++++++++++++++++++++++-----------
 dockerfiles/README.md       | 21 ++++++++++++--
 2 files changed, 57 insertions(+), 19 deletions(-)

diff --git a/dockerfiles/Dockerfile.cuda b/dockerfiles/Dockerfile.cuda
index b5701eea82c6c..d2d656648f2e7 100644
--- a/dockerfiles/Dockerfile.cuda
+++ b/dockerfiles/Dockerfile.cuda
@@ -2,16 +2,19 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License.
 # --------------------------------------------------------------
-# Build onnxruntime-gpu python package with CUDA 12.6 & CUDNN 9.4 for python 3.12 in Ubuntu 24.04 for Nvidia GPU.
+# Build onnxruntime-gpu python package with CUDA 12.x & CUDNN 9.x for python 3.12 in Ubuntu 24.04.
 # If memory is less than 64GB, you may change "--parallel" to "--parallel 4" to avoid out-of-memory error.
 
-FROM nvcr.io/nvidia/cuda:12.6.1-devel-ubuntu24.04
+ARG CUDA_VERSION=12.6.1
+ARG CUDNN_VERSION=9.5.0.50
+ARG OS=ubuntu24.04
 
-# Target CUDA device with compute capability >= 6.1
+FROM nvcr.io/nvidia/cuda:${CUDA_VERSION}-devel-${OS}
+ARG CUDA_VERSION
+ARG CUDNN_VERSION
 ARG CMAKE_CUDA_ARCHITECTURES="61;70;75;80;86;90"
 
 ENV DEBIAN_FRONTEND=noninteractive
-MAINTAINER Changming Sun "chasun@microsoft.com"
 
 # Add source code to /code
 ADD . /code
@@ -34,9 +37,11 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
     && rm -rf /var/lib/apt/lists/*
 
 # Install CUDNN 9.4.0.58 for building ONNX Runtime with CUDA.
-RUN wget https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-9.4.0.58_cuda12-archive.tar.xz \
+RUN cudnn_tar="cudnn-linux-x86_64-${CUDNN_VERSION}_cuda${CUDA_VERSION%%.*}-archive.tar.xz" \
+    && wget "https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/${cudnn_tar}" \
     && mkdir -p /code/build/cudnn \
-    && tar -Jxvf cudnn-linux-x86_64-9.4.0.58_cuda12-archive.tar.xz -C /code/build/cudnn --strip=1
+    && tar -Jxvf ${cudnn_tar} -C /code/build/cudnn --strip=1 \
+    && rm -f ${cudnn_tar}
 
 # Create a virtual environment and install dependencies, then build ONNX Runtime with CUDA support.
 RUN cd /code \
@@ -55,34 +60,52 @@ RUN cd /code \
     --cmake_extra_defines ONNXRUNTIME_VERSION=$(cat ./VERSION_NUMBER) "CMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES}" onnxruntime_BUILD_UNIT_TESTS=OFF
 
 # Start second stage to copy the build artifacts
-FROM nvcr.io/nvidia/cuda:12.6.1-runtime-ubuntu24.04
-ENV DEBIAN_FRONTEND=noninteractive
+FROM nvcr.io/nvidia/cuda:${CUDA_VERSION}-runtime-${OS}
+ARG CUDA_VERSION
+ARG CUDNN_VERSION
+ARG GIT_COMMIT
+ARG GIT_BRANCH
+ARG ONNXRUNTIME_VERSION
+
+# Make sure the required build arguments are set. See README.md for more information.
+RUN test -n ${GIT_COMMIT:?}
+RUN test -n ${GIT_BRANCH:?}
+RUN test -n ${ONNXRUNTIME_VERSION:?}
+
+LABEL CUDA_VERSION="${CUDA_VERSION}"
+LABEL CUDNN_VERSION="${CUDNN_VERSION}"
+LABEL maintainer="Changming Sun <chasun@microsoft.com>"
+LABEL onnxruntime_version="${ONNXRUNTIME_VERSION}"
+LABEL onnxruntime_git_branch="${GIT_BRANCH}"
+LABEL onnxruntime_git_commit="${GIT_COMMIT}"
 
 # Copy built wheel and license
 COPY --from=0 /code/build/Linux/Release/dist /ort
 COPY --from=0 /code/dockerfiles/LICENSE-IMAGE.txt /code/LICENSE-IMAGE.txt
 
-# Set LD_LIBRARY_PATH so that runtime can load CUDA and CUDNN DLLs.
-# CUDNN will be installed by nvidia-cudnn-cu12 python package later.
-# Its location is in the site-packages directory, which can be retrieved like the following:
-#   python -c "import sysconfig; print(sysconfig.get_path('purelib'))"
+# Set environment variables
+ENV DEBIAN_FRONTEND=noninteractive
+ENV CUDNN_VERSION=$CUDNN_VERSION
+ENV ONNXRUNTIME_VERSION=$ONNXRUNTIME_VERSION
+# CUDNN from nvidia-cudnn-cu12 python package is located in the site-packages directory of python virtual environment.
 ENV LD_LIBRARY_PATH="/ort/env/lib/python3.12/site-packages/nvidia/cudnn/lib:/usr/local/cuda/lib64"
 
-# Install runtime dependencies, and run a simple test to verify the installation.
+# Install runtime dependencies
 RUN apt-get update && apt-get install -y --no-install-recommends \
     libstdc++6 \
     ca-certificates \
     python3-pip \
     python3.12-venv \
-    unattended-upgrades \
-    && unattended-upgrade \
     && python3 -m venv /ort/env \
     && . /ort/env/bin/activate \
     && pip install /ort/*.whl \
-    && pip install nvidia-cudnn-cu12==9.4.0.58 \
+    && pip install nvidia-cudnn-cu${CUDA_VERSION%%.*}==${CUDNN_VERSION} \
     && python -c 'import onnxruntime; print(onnxruntime.get_available_providers())' \
     && rm -rf /ort/*.whl \
     && rm -rf /var/lib/apt/lists/*
 
 # Ensure the virtual environment is always activated when running commands in the container.
 RUN echo ". /ort/env/bin/activate" >> ~/.bashrc
+
+# Set the default command to start an interactive bash shell
+CMD [ "/bin/bash" ]
diff --git a/dockerfiles/README.md b/dockerfiles/README.md
index 008587a01082b..7825940571769 100644
--- a/dockerfiles/README.md
+++ b/dockerfiles/README.md
@@ -40,18 +40,33 @@ The docker file supports both x86_64 and ARM64(aarch64). You may use docker's "-
 However, we cannot build the code for 32-bit ARM in such a way since a 32-bit compiler/linker might not have enough memory to generate the binaries.
 
 ## CUDA
-**Ubuntu 22.04, CUDA 12.1, CuDNN 8**
+**Ubuntu 24.04, CUDA 12.x, CuDNN 9.x**
 
 1. Build the docker image from the Dockerfile in this repository.
+  Choose available [cuda version](https://hub.docker.com/r/nvidia/cuda/tags) or [cudnn version](https://pypi.org/project/nvidia-cudnn-cu12/#history), then build docker image like the following:
+
   ```
-  docker build -t onnxruntime-cuda -f Dockerfile.cuda ..
+  git submodule update --init
+  docker build -t onnxruntime-cuda --build-arg CUDA_VERSION=12.6.1 \
+                                   --build-arg CUDNN_VERSION=9.5.0.50 \
+                                   --build-arg GIT_BRANCH=$(git rev-parse --abbrev-ref HEAD) \
+                                   --build-arg GIT_COMMIT=$(git rev-parse HEAD) \
+                                   --build-arg ONNXRUNTIME_VERSION=$(cat ../VERSION_NUMBER) \
+                                   -f Dockerfile.cuda ..
+
   ```
 
+  To inspect the labels of the built image, run the following:
+  ```
+  docker inspect onnxruntime-cuda
+  ```
 2. Run the Docker image
 
   ```
-  docker run --gpus all -it onnxruntime-cuda
+  docker run --rm --gpus all -it onnxruntime-cuda
+  ```
   or
+  ```
   nvidia-docker run -it onnxruntime-cuda
 
   ```