Skip to content

Commit

Permalink
Merge pull request #2625 from ROCm/r2.14-add-ub22
Browse files Browse the repository at this point in the history
[r2.14-rocm-enhanced] add ub22; unify Python install scripts
  • Loading branch information
jayfurmanek authored Aug 14, 2024
2 parents 36a4c41 + 685d825 commit 4e7b1c1
Show file tree
Hide file tree
Showing 14 changed files with 310 additions and 236 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ COPY setup.packages.rocm.cs7.sh setup.packages.rocm.cs7.sh
COPY builder.packages.rocm.cs7.txt builder.packages.rocm.cs7.txt
RUN /setup.packages.rocm.cs7.sh /builder.packages.rocm.cs7.txt

ARG GPU_DEVICE_TARGETS="gfx900 gfx906 gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100"
ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100"
ENV GPU_DEVICE_TARGETS=${GPU_DEVICE_TARGETS}

# Install ROCM
Expand All @@ -18,9 +18,9 @@ ARG ROCM_PATH=/opt/rocm-${ROCM_VERSION}
ENV ROCM_PATH=${ROCM_PATH}
COPY ${CUSTOM_INSTALL} /${CUSTOM_INSTALL}
COPY setup.packages.rocm.cs7.sh /setup.packages.rocm.cs7.sh
COPY setup.rocm.cs7.sh /setup.rocm.cs7.sh
COPY setup.rocm.sh /setup.rocm.sh
COPY devel.packages.rocm.cs7.txt /devel.packages.rocm.cs7.txt
RUN /setup.rocm.cs7.sh $ROCM_VERSION
RUN /setup.rocm.sh $ROCM_VERSION el7

# Install various tools.
# - bats: bash unit testing framework
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ COPY setup.packages.rocm.el8.sh setup.packages.rocm.el8.sh
COPY builder.packages.rocm.el8.txt builder.packages.rocm.el8.txt
RUN /setup.packages.rocm.el8.sh /builder.packages.rocm.el8.txt

ARG GPU_DEVICE_TARGETS="gfx900 gfx906 gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100"
ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100"
ENV GPU_DEVICE_TARGETS=${GPU_DEVICE_TARGETS}

# Install ROCM
Expand All @@ -26,9 +26,9 @@ ARG CUSTOM_INSTALL
ARG ROCM_PATH=/opt/rocm-${ROCM_VERSION}
ENV ROCM_PATH=${ROCM_PATH}
COPY ${CUSTOM_INSTALL} /${CUSTOM_INSTALL}
COPY setup.rocm.el8.sh /setup.rocm.el8.sh
COPY setup.rocm.sh /setup.rocm.sh
COPY devel.packages.rocm.el8.txt /devel.packages.rocm.el8.txt
RUN /setup.rocm.el8.sh $ROCM_VERSION
RUN /setup.rocm.sh $ROCM_VERSION el8

# Install various tools.
# - bats: bash unit testing framework
Expand Down
55 changes: 0 additions & 55 deletions tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.rt

This file was deleted.

55 changes: 55 additions & 0 deletions tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.rt.ub20
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
################################################################################
ARG DISTRO_IMG
FROM ${DISTRO_IMG:-'ubuntu:20.04'} as runtime
################################################################################

# Install dependencies
COPY setup.packages.sh /setup.packages.sh
COPY runtime.packages.txt /runtime.packages.txt
COPY sles.runtime.packages.txt /sles.runtime.packages.txt
RUN /setup.packages.sh /runtime.packages.txt

# Install ROCM
ARG TF_PKGS_DIR=tmp/packages
ARG TENSORFLOW_PACKAGE=tf_nightly_rocm
ARG ROCM_VERSION=6.1.2
ARG CUSTOM_INSTALL
ARG ROCM_PATH=/opt/rocm-${ROCM_VERSION}
ENV ROCM_PATH=${ROCM_PATH}
COPY ${TF_PKGS_DIR}/${TENSORFLOW_PACKAGE} /${TF_PKGS_DIR}/${TENSORFLOW_PACKAGE}
COPY ${CUSTOM_INSTALL} /${CUSTOM_INSTALL}
COPY setup.rocm.sh /setup.rocm.sh
COPY devel.packages.rocm.txt /devel.packages.rocm.txt
COPY sles.devel.packages.rocm.txt /sles.devel.packages.rocm.txt
RUN /setup.rocm.sh $ROCM_VERSION focal

# All lines past this point are reset when $CACHEBUSTER is set. We need this
# for Python specifically because we install some nightly packages which are
# likely to change daily.
ARG CACHEBUSTER=0
RUN echo $CACHEBUSTER

# Setup Python environment. PYTHON_VERSION is e.g. "python3.8"
ARG PYTHON_VERSION
COPY setup.python.sh /setup.python.sh
COPY devel.requirements.txt /devel.requirements.txt
RUN /setup.python.sh $PYTHON_VERSION devel.requirements.txt true

# Setup ENV variables for tensorflow pip build
ENV TF_NEED_ROCM=1
ENV TF_ROCM_GCC=1
ENV ROCM_TOOLKIT_PATH=${ROCM_PATH}

RUN pip install --no-cache-dir /${TF_PKGS_DIR}/${TENSORFLOW_PACKAGE}
RUN echo 'ALL ALL=NOPASSWD:ALL' | tee /etc/sudoers.d/sudo-nopasswd

ARG TF_TESTING_FL
ENV TF_TESTING_FL=${TF_TESTING_FL}
ARG DWLD_TF_SRC_CMD
RUN if [ -n "${DWLD_TF_SRC_CMD}" ]; then eval "${DWLD_TF_SRC_CMD}"; fi
RUN wget https://github.com/bazelbuild/bazelisk/releases/download/v1.11.0/bazelisk-linux-amd64 -O /usr/local/bin/bazel && \
chmod +x /usr/local/bin/bazel
RUN git clone https://github.com/tensorflow/models.git
RUN git clone https://github.com/tensorflow/examples.git
RUN git clone https://github.com/tensorflow/autograph.git
RUN git clone https://github.com/tensorflow/benchmarks.git
55 changes: 55 additions & 0 deletions tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.rt.ub22
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
################################################################################
ARG DISTRO_IMG
FROM ${DISTRO_IMG:-'ubuntu:22.04'} as runtime
################################################################################

# Install dependencies
COPY setup.packages.sh /setup.packages.sh
COPY runtime.packages.txt /runtime.packages.txt
COPY sles.runtime.packages.txt /sles.runtime.packages.txt
RUN /setup.packages.sh /runtime.packages.txt

# Install ROCM
ARG TF_PKGS_DIR=tmp/packages
ARG TENSORFLOW_PACKAGE=tf_nightly_rocm
ARG ROCM_VERSION=6.1.2
ARG CUSTOM_INSTALL
ARG ROCM_PATH=/opt/rocm-${ROCM_VERSION}
ENV ROCM_PATH=${ROCM_PATH}
COPY ${TF_PKGS_DIR}/${TENSORFLOW_PACKAGE} /${TF_PKGS_DIR}/${TENSORFLOW_PACKAGE}
COPY ${CUSTOM_INSTALL} /${CUSTOM_INSTALL}
COPY setup.rocm.sh /setup.rocm.sh
COPY devel.packages.rocm.txt /devel.packages.rocm.txt
COPY sles.devel.packages.rocm.txt /sles.devel.packages.rocm.txt
RUN /setup.rocm.sh $ROCM_VERSION jammy

# All lines past this point are reset when $CACHEBUSTER is set. We need this
# for Python specifically because we install some nightly packages which are
# likely to change daily.
ARG CACHEBUSTER=0
RUN echo $CACHEBUSTER

# Setup Python environment. PYTHON_VERSION is e.g. "python3.8"
ARG PYTHON_VERSION
COPY setup.python.sh /setup.python.sh
COPY devel.requirements.txt /devel.requirements.txt
RUN /setup.python.sh $PYTHON_VERSION devel.requirements.txt true

# Setup ENV variables for tensorflow pip build
ENV TF_NEED_ROCM=1
ENV TF_ROCM_GCC=1
ENV ROCM_TOOLKIT_PATH=${ROCM_PATH}

RUN pip install --no-cache-dir /${TF_PKGS_DIR}/${TENSORFLOW_PACKAGE}
RUN echo 'ALL ALL=NOPASSWD:ALL' | tee /etc/sudoers.d/sudo-nopasswd

ARG TF_TESTING_FL
ENV TF_TESTING_FL=${TF_TESTING_FL}
ARG DWLD_TF_SRC_CMD
RUN if [ -n "${DWLD_TF_SRC_CMD}" ]; then eval "${DWLD_TF_SRC_CMD}"; fi
RUN wget https://github.com/bazelbuild/bazelisk/releases/download/v1.11.0/bazelisk-linux-amd64 -O /usr/local/bin/bazel && \
chmod +x /usr/local/bin/bazel
RUN git clone https://github.com/tensorflow/models.git
RUN git clone https://github.com/tensorflow/examples.git
RUN git clone https://github.com/tensorflow/autograph.git
RUN git clone https://github.com/tensorflow/benchmarks.git
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
FROM ubuntu:20.04
################################################################################

ARG GPU_DEVICE_TARGETS="gfx900 gfx906 gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100"
ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100"
ENV GPU_DEVICE_TARGETS=${GPU_DEVICE_TARGETS}

# Install build dependencies
Expand All @@ -18,7 +18,7 @@ ENV ROCM_PATH=${ROCM_PATH}
COPY ${CUSTOM_INSTALL} /${CUSTOM_INSTALL}
COPY setup.rocm.sh /setup.rocm.sh
COPY devel.packages.rocm.txt /devel.packages.rocm.txt
RUN /setup.rocm.sh $ROCM_VERSION
RUN /setup.rocm.sh $ROCM_VERSION focal

# Install various tools.
# - bats: bash unit testing framework
Expand Down
72 changes: 72 additions & 0 deletions tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.ub22
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
################################################################################
FROM ubuntu:22.04
################################################################################

ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100"
ENV GPU_DEVICE_TARGETS=${GPU_DEVICE_TARGETS}

# Install build dependencies
COPY setup.packages.sh setup.packages.sh
COPY builder.packages.txt builder.packages.txt
RUN /setup.packages.sh /builder.packages.txt

# Install ROCM
ARG ROCM_VERSION=6.1.2
ARG CUSTOM_INSTALL
ARG ROCM_PATH=/opt/rocm-${ROCM_VERSION}
ENV ROCM_PATH=${ROCM_PATH}
COPY ${CUSTOM_INSTALL} /${CUSTOM_INSTALL}
COPY setup.rocm.sh /setup.rocm.sh
COPY devel.packages.rocm.txt /devel.packages.rocm.txt
RUN /setup.rocm.sh $ROCM_VERSION jammy

# Install various tools.
# - bats: bash unit testing framework
# - bazelisk: always use the correct bazel version
# - buildifier: clean bazel build deps
# - buildozer: clean bazel build deps
# - gcloud SDK: communicate with Google Cloud Platform (GCP) for RBE, CI
RUN git clone --branch v1.7.0 https://github.com/bats-core/bats-core.git && bats-core/install.sh /usr/local && rm -rf bats-core
RUN wget https://github.com/bazelbuild/bazelisk/releases/download/v1.11.0/bazelisk-linux-amd64 -O /usr/local/bin/bazel && chmod +x /usr/local/bin/bazel
RUN wget https://github.com/bazelbuild/buildtools/releases/download/3.5.0/buildifier -O /usr/local/bin/buildifier && chmod +x /usr/local/bin/buildifier
RUN wget https://github.com/bazelbuild/buildtools/releases/download/3.5.0/buildozer -O /usr/local/bin/buildozer && chmod +x /usr/local/bin/buildozer
RUN curl -sSL https://sdk.cloud.google.com > /tmp/gcloud && bash /tmp/gcloud --install-dir=~/usr/local/bin --disable-prompts


# All lines past this point are reset when $CACHEBUSTER is set. We need this
# for Python specifically because we install some nightly packages which are
# likely to change daily.
ARG CACHEBUSTER=0
RUN echo $CACHEBUSTER

# Setup Python environment. PYTHON_VERSION is e.g. "python3.8"
ARG PYTHON_VERSION
COPY setup.python.sh /setup.python.sh
COPY devel.requirements.txt /devel.requirements.txt
RUN /setup.python.sh $PYTHON_VERSION devel.requirements.txt

ARG TF_WHEEL_URL
RUN if [ -n "${TF_WHEEL_URL}" ]; then pip install "${TF_WHEEL_URL}"; fi
ARG DWLD_TF_SRC_CMD
RUN if [ -n "${DWLD_TF_SRC_CMD}" ]; then eval "${DWLD_TF_SRC_CMD}"; fi

# Setup build and environment
COPY devel.usertools /usertools
COPY devel.bashrc /root/.bashrc

# Setup ENV variables for tensorflow pip build
ENV TF_NEED_ROCM=1
ENV TF_ROCM_GCC=1
ENV ROCM_TOOLKIT_PATH=${ROCM_PATH}

# Don't use the bazel cache when a new docker image is created.
RUN echo build --action_env=DOCKER_CACHEBUSTER=$(date +%s%N)$RANDOM >> /etc/bazel.bazelrc
RUN echo build --host_action_env=DOCKER_HOST_CACHEBUSTER=$(date +%s%N)$RANDOM >> /etc/bazel.bazelrc

ARG TF_TESTING_FL
ENV TF_TESTING_FL=${TF_TESTING_FL}
ARG DWLD_TF_SRC_CMD
RUN if [ -n "${DWLD_TF_SRC_CMD}" ]; then eval "${DWLD_TF_SRC_CMD}"; fi
ARG CLONE_TEST_REPO
COPY ${CLONE_TEST_REPO} /${CLONE_TEST_REPO}
RUN if [ -n "${CLONE_TEST_REPO}" ]; then bash /${CLONE_TEST_REPO}; fi
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
# limitations under the License.
# ==============================================================================
setup_file() {
cd /tf/tensorflow
bazel version # Start the bazel server
}

Expand Down Expand Up @@ -151,14 +150,11 @@ _test_lib$
//tensorflow:no_tensorflow_py_deps
//tensorflow/tools/pip_package:win_pip_package_marker
//tensorflow/core:image_testdata
//tensorflow/core/lib/lmdb:lmdb_testdata
//tensorflow/core/lib/lmdb/testdata:lmdb_testdata
//tensorflow/core/kernels/cloud:bigquery_reader_ops
//tensorflow/python:extra_py_tests_deps
//tensorflow/python:mixed_precision
//tensorflow/python:tf_optimizer
//tensorflow/python:compare_test_proto_py
//tensorflow/python/framework:test_ops_2
//tensorflow/python/framework:test_file_system.so
//tensorflow/python/debug:grpc_tensorflow_server.par
//tensorflow/python/feature_column:vocabulary_testdata
Expand Down Expand Up @@ -302,7 +298,7 @@ EOF
# anything with a Windows-only toolchain, and bazel errors if trying to build
# that directory.
@test "bazel nobuild passes on all of TF except TF Lite and win toolchains" {
bazel build --experimental_cc_shared_library --nobuild --keep_going -- //tensorflow/... -//tensorflow/lite/... -//tensorflow/tools/toolchains/win/... -//tensorflow/tools/toolchains/win_1803/... -//tensorflow/compiler/xla/stream_executor/cuda/...
bazel build --experimental_cc_shared_library --nobuild --keep_going -- //tensorflow/... -//tensorflow/lite/... -//tensorflow/tools/toolchains/win/... -//tensorflow/tools/toolchains/win_1803/...
}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,6 @@ test:nonpip_filters_multi_gpu --test_tag_filters=-no_gpu,-no_rocm
test:nonpip_filters_multi_gpu --build_tag_filters=-no_gpu,-no_rocm
test:nonpip_filters_multi_gpu --test_lang_filters=py --flaky_test_attempts=2 --test_size_filters=small,medium,large --test_env=TF_PER_DEVICE_MEMORY_LIMIT_MB=2048
test:nonpip_multi_gpu --config=nonpip_filters_multi_gpu -- \
//tensorflow/core/common_runtime/gpu:gpu_device_unified_memory_test_2gpu \
//tensorflow/core/nccl:nccl_manager_test_2gpu \
//tensorflow/python/distribute/integration_test:mwms_peer_failure_test_2gpu \
//tensorflow/python/distribute:checkpoint_utils_test_2gpu \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,6 @@ test:nonpip_filters_multi_gpu --test_tag_filters=-no_gpu,-no_rocm
test:nonpip_filters_multi_gpu --build_tag_filters=-no_gpu,-no_rocm
test:nonpip_filters_multi_gpu --test_lang_filters=py --flaky_test_attempts=2 --test_size_filters=small,medium,large --test_env=TF_PER_DEVICE_MEMORY_LIMIT_MB=2048
test:nonpip_multi_gpu --config=nonpip_filters_multi_gpu -- \
//tensorflow/core/common_runtime/gpu:gpu_device_unified_memory_test_2gpu \
//tensorflow/core/nccl:nccl_manager_test_2gpu \
//tensorflow/python/distribute/integration_test:mwms_peer_failure_test_2gpu \
//tensorflow/python/distribute:checkpoint_utils_test_2gpu \
Expand Down
Loading

0 comments on commit 4e7b1c1

Please sign in to comment.