From a131e70a67bfaf8f46fde293e3298b49f9937a62 Mon Sep 17 00:00:00 2001
From: Orti Bazar <orti.bazar@gmail.com>
Date: Fri, 29 Sep 2023 11:59:58 -0700
Subject: [PATCH] Add new dockerfile based on nvidia/cuda:12 (#546)

---
 .../docker/Dockerfile_ubuntu_2204_tf_cuda_12  | 113 ++++++++++++++++++
 1 file changed, 113 insertions(+)
 create mode 100644 perfzero/docker/Dockerfile_ubuntu_2204_tf_cuda_12

diff --git a/perfzero/docker/Dockerfile_ubuntu_2204_tf_cuda_12 b/perfzero/docker/Dockerfile_ubuntu_2204_tf_cuda_12
new file mode 100644
index 00000000..af59accd
--- /dev/null
+++ b/perfzero/docker/Dockerfile_ubuntu_2204_tf_cuda_12
@@ -0,0 +1,113 @@
+# Ubuntu 22.04 Python3 with CUDA 11 and the following:
+#  - Installs tf-nightly-gpu (this is TF 2.3)
+#  - Installs requirements.txt for tensorflow/models
+
+FROM nvidia/cuda:12.2.0-devel-ubuntu20.04 as base
+ARG tensorflow_pip_spec="tf-nightly"
+ARG local_tensorflow_pip_spec=""
+ARG extra_pip_specs=""
+ENV PIP_CMD="python3.11 -m pip"
+
+# setup.py passes the base path of local .whl file is chosen for the docker image.
+# Otherwise passes an empty existing file from the context.
+COPY ${local_tensorflow_pip_spec} /${local_tensorflow_pip_spec}
+
+# Pick up some TF dependencies
+# cublas-dev and libcudnn7-dev only needed because of libnvinfer-dev which may not
+# really be needed.
+# In the future, add the following lines in a shell script running on the
+# benchmark vm to get the available dependent versions when updating cuda
+# version (e.g to 10.2 or something later):
+# sudo apt-cache search cuda-command-line-tool
+# sudo apt-cache search cuda-cublas
+# sudo apt-cache search cuda-cufft
+# sudo apt-cache search cuda-curand
+# sudo apt-cache search cuda-cusolver
+# sudo apt-cache search cuda-cusparse
+
+# Needed to disable prompts during installation.
+ENV DEBIAN_FRONTEND noninteractive
+
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+        libfreetype6-dev \
+        libhdf5-serial-dev \
+        libzmq3-dev \
+        libpng-dev \
+        pkg-config \
+        software-properties-common \
+        unzip \
+        lsb-core \
+        curl
+
+# Python 3.11 related deps in this ppa.
+
+RUN add-apt-repository ppa:deadsnakes/ppa
+
+
+# Install / update Python and Python3
+RUN apt-get install -y --no-install-recommends \
+      python3.11 \
+      python3-pip \
+      python3.11-dev \
+      python3-setuptools \
+      python3.11-venv \
+      python3.11-distutils \
+      python3.11-lib2to3
+
+RUN python3.11 -m ensurepip --upgrade
+
+# Upgrade pip, need to use pip3 and then pip after this or an error
+# is thrown for no main found.
+RUN ${PIP_CMD} install --upgrade pip
+RUN ${PIP_CMD} install --upgrade distlib
+# setuptools upgraded to fix install requirements from model garden.
+RUN ${PIP_CMD} install --upgrade setuptools
+
+# For CUDA profiling, TensorFlow requires CUPTI.
+ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda-11.2/lib64:$LD_LIBRARY_PATH
+
+# See http://bugs.python.org/issue19846
+ENV LANG C.UTF-8
+
+# Add google-cloud-sdk to the source list
+# Note: use "cloud-sdk" instead of "cloud-sdk-$(lsb_release -c -s)" to resolve "'http://packages.cloud.google.com/apt cloud-sdk-focal Release' does not have a Release file
+RUN echo "deb http://packages.cloud.google.com/apt cloud-sdk main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list
+RUN curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add -
+
+# Install extras needed by most models
+RUN apt-get update && apt-get install -y --no-install-recommends \
+      git \
+      ca-certificates \
+      wget \
+      htop \
+      zip \
+      google-cloud-sdk
+
+RUN ${PIP_CMD} install --upgrade pyyaml
+RUN ${PIP_CMD} install --upgrade google-api-python-client==1.8.0
+RUN ${PIP_CMD} install --upgrade google-cloud google-cloud-bigquery google-cloud-datastore mock
+
+
+RUN ${PIP_CMD} install wheel
+RUN ${PIP_CMD} install absl-py
+RUN ${PIP_CMD} install --upgrade --force-reinstall ${tensorflow_pip_spec} ${extra_pip_specs}
+
+RUN ${PIP_CMD} install tfds-nightly
+RUN ${PIP_CMD} install -U scikit-learn
+
+# Install dependnecies needed for tf.distribute test utils
+RUN ${PIP_CMD} install dill tblib portpicker
+
+RUN curl https://raw.githubusercontent.com/tensorflow/models/master/official/nightly_requirements.txt > /tmp/requirements.txt
+RUN ${PIP_CMD} install -r /tmp/requirements.txt
+
+RUN ${PIP_CMD} install tf-estimator-nightly
+RUN ${PIP_CMD} install tensorflow-text-nightly
+
+# RUN nvidia-smi
+
+RUN nvcc --version
+
+
+RUN ${PIP_CMD} freeze