-
Notifications
You must be signed in to change notification settings - Fork 3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Training packaging pipeline for cuda12 (#18524)
### Description <!-- Describe your changes. --> Build ORT-training packaging pipeline for CUDA 12.2 ### Motivation and Context <!-- - Why is this change required? What problem does it solve? - If it fixes an open issue, please link to the issue here. --> This will help any customer using CUDA 12 and would not need to build ORT-training from source Test run: https://dev.azure.com/aiinfra/Lotus/_build/results?buildId=382993&view=logs&s=130be951-c2f3-5601-5709-434b5e50ddb0
- Loading branch information
Showing
3 changed files
with
209 additions
and
0 deletions.
There are no files selected for viewing
22 changes: 22 additions & 0 deletions
22
tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-cuda12.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
trigger: none | ||
|
||
resources: | ||
repositories: | ||
- repository: manylinux | ||
type: Github | ||
endpoint: Microsoft | ||
name: pypa/manylinux | ||
ref: 5eda9aded5462201e6310105728d33016e637ea7 | ||
|
||
stages: | ||
- template: templates/py-packaging-training-cuda-stage.yml | ||
parameters: | ||
build_py_parameters: --enable_training --update --build | ||
torch_version: '2.1.0' | ||
opset_version: '15' | ||
cuda_version: '12.2' | ||
cmake_cuda_architectures: 70;75;80;86;90 | ||
docker_file: Dockerfile.manylinux2_28_training_cuda12_2 | ||
agent_pool: Onnxruntime-Linux-GPU | ||
upload_wheel: 'yes' | ||
debug_build: false |
180 changes: 180 additions & 0 deletions
180
tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_training_cuda12_2
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,180 @@ | ||
ARG BASEIMAGE=nvidia/cuda:12.2.2-cudnn8-devel-ubi8 | ||
ARG POLICY=manylinux2014 | ||
ARG PLATFORM=x86_64 | ||
ARG DEVTOOLSET_ROOTPATH= | ||
ARG LD_LIBRARY_PATH_ARG= | ||
ARG PREPEND_PATH= | ||
|
||
#We need both CUDA and manylinux. But the CUDA Toolkit End User License Agreement says NVIDIA CUDA Driver Libraries(libcuda.so, libnvidia-ptxjitcompiler.so) are only distributable in applications that meet this criteria: | ||
#1. The application was developed starting from a NVIDIA CUDA container obtained from Docker Hub or the NVIDIA GPU Cloud, and | ||
#2. The resulting application is packaged as a Docker container and distributed to users on Docker Hub or the NVIDIA GPU Cloud only. | ||
#So we use CUDA as the base image then add manylinux on top of it. | ||
|
||
#Build manylinux2014 docker image begin | ||
FROM $BASEIMAGE AS runtime_base | ||
ARG POLICY | ||
ARG PLATFORM | ||
ARG DEVTOOLSET_ROOTPATH | ||
ARG LD_LIBRARY_PATH_ARG | ||
ARG PREPEND_PATH | ||
LABEL maintainer="The ManyLinux project" | ||
|
||
ENV AUDITWHEEL_POLICY=${POLICY} AUDITWHEEL_ARCH=${PLATFORM} AUDITWHEEL_PLAT=${POLICY}_${PLATFORM} | ||
ENV LC_ALL=en_US.UTF-8 LANG=en_US.UTF-8 LANGUAGE=en_US.UTF-8 | ||
ENV DEVTOOLSET_ROOTPATH=${DEVTOOLSET_ROOTPATH} | ||
ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH_ARG} | ||
ENV PATH=${PREPEND_PATH}${PATH} | ||
ENV PKG_CONFIG_PATH=/usr/local/lib/pkgconfig | ||
|
||
# first copy the fixup mirrors script, keep the script around | ||
COPY build_scripts/fixup-mirrors.sh /usr/local/sbin/fixup-mirrors | ||
|
||
# setup entrypoint, this will wrap commands with `linux32` with i686 images | ||
COPY build_scripts/install-entrypoint.sh \ | ||
build_scripts/build_utils.sh \ | ||
/build_scripts/ | ||
|
||
RUN /build_scripts/install-entrypoint.sh && rm -rf /build_scripts | ||
COPY manylinux-entrypoint /usr/local/bin/manylinux-entrypoint | ||
ENTRYPOINT ["manylinux-entrypoint"] | ||
|
||
COPY build_scripts/install-runtime-packages.sh \ | ||
build_scripts/build_utils.sh \ | ||
/build_scripts/ | ||
RUN manylinux-entrypoint /build_scripts/install-runtime-packages.sh && rm -rf /build_scripts/ | ||
|
||
COPY build_scripts/build_utils.sh /build_scripts/ | ||
|
||
COPY build_scripts/install-autoconf.sh /build_scripts/ | ||
RUN export AUTOCONF_ROOT=autoconf-2.71 && \ | ||
export AUTOCONF_HASH=431075ad0bf529ef13cb41e9042c542381103e80015686222b8a9d4abef42a1c && \ | ||
export AUTOCONF_DOWNLOAD_URL=http://ftp.gnu.org/gnu/autoconf && \ | ||
manylinux-entrypoint /build_scripts/install-autoconf.sh | ||
|
||
COPY build_scripts/install-automake.sh /build_scripts/ | ||
RUN export AUTOMAKE_ROOT=automake-1.16.5 && \ | ||
export AUTOMAKE_HASH=07bd24ad08a64bc17250ce09ec56e921d6343903943e99ccf63bbf0705e34605 && \ | ||
export AUTOMAKE_DOWNLOAD_URL=http://ftp.gnu.org/gnu/automake && \ | ||
manylinux-entrypoint /build_scripts/install-automake.sh | ||
|
||
COPY build_scripts/install-libtool.sh /build_scripts/ | ||
RUN export LIBTOOL_ROOT=libtool-2.4.7 && \ | ||
export LIBTOOL_HASH=04e96c2404ea70c590c546eba4202a4e12722c640016c12b9b2f1ce3d481e9a8 && \ | ||
export LIBTOOL_DOWNLOAD_URL=http://ftp.gnu.org/gnu/libtool && \ | ||
manylinux-entrypoint /build_scripts/install-libtool.sh | ||
|
||
COPY build_scripts/install-libxcrypt.sh /build_scripts/ | ||
RUN export LIBXCRYPT_VERSION=4.4.28 && \ | ||
export LIBXCRYPT_HASH=db7e37901969cb1d1e8020cb73a991ef81e48e31ea5b76a101862c806426b457 && \ | ||
export LIBXCRYPT_DOWNLOAD_URL=https://github.com/besser82/libxcrypt/archive && \ | ||
export PERL_ROOT=perl-5.34.0 && \ | ||
export PERL_HASH=551efc818b968b05216024fb0b727ef2ad4c100f8cb6b43fab615fa78ae5be9a && \ | ||
export PERL_DOWNLOAD_URL=https://www.cpan.org/src/5.0 && \ | ||
manylinux-entrypoint /build_scripts/install-libxcrypt.sh | ||
|
||
FROM runtime_base AS build_base | ||
COPY build_scripts/install-build-packages.sh /build_scripts/ | ||
RUN manylinux-entrypoint /build_scripts/install-build-packages.sh | ||
|
||
|
||
FROM build_base AS build_git | ||
COPY build_scripts/build-git.sh /build_scripts/ | ||
RUN export GIT_ROOT=git-2.36.2 && \ | ||
export GIT_HASH=6dc2cdea5fb23d823ba4871cc23222c1db31dfbb6d6c6ff74c4128700df57c68 && \ | ||
export GIT_DOWNLOAD_URL=https://www.kernel.org/pub/software/scm/git && \ | ||
manylinux-entrypoint /build_scripts/build-git.sh | ||
|
||
|
||
FROM build_base AS build_cpython | ||
COPY build_scripts/build-sqlite3.sh /build_scripts/ | ||
RUN export SQLITE_AUTOCONF_ROOT=sqlite-autoconf-3390200 && \ | ||
export SQLITE_AUTOCONF_HASH=852be8a6183a17ba47cee0bbff7400b7aa5affd283bf3beefc34fcd088a239de && \ | ||
export SQLITE_AUTOCONF_DOWNLOAD_URL=https://www.sqlite.org/2022 && \ | ||
manylinux-entrypoint /build_scripts/build-sqlite3.sh | ||
|
||
COPY build_scripts/build-openssl.sh /build_scripts/ | ||
RUN export OPENSSL_ROOT=openssl-1.1.1q && \ | ||
export OPENSSL_HASH=d7939ce614029cdff0b6c20f0e2e5703158a489a72b2507b8bd51bf8c8fd10ca && \ | ||
export OPENSSL_DOWNLOAD_URL=https://www.openssl.org/source && \ | ||
manylinux-entrypoint /build_scripts/build-openssl.sh | ||
|
||
COPY build_scripts/build-cpython.sh /build_scripts/ | ||
|
||
|
||
FROM build_cpython AS build_cpython38 | ||
COPY build_scripts/ambv-pubkey.txt /build_scripts/cpython-pubkeys.txt | ||
RUN manylinux-entrypoint /build_scripts/build-cpython.sh 3.8.13 | ||
|
||
|
||
FROM build_cpython AS build_cpython39 | ||
COPY build_scripts/ambv-pubkey.txt /build_scripts/cpython-pubkeys.txt | ||
RUN manylinux-entrypoint /build_scripts/build-cpython.sh 3.9.13 | ||
|
||
|
||
FROM build_cpython AS build_cpython310 | ||
COPY build_scripts/cpython-pubkey-310-311.txt /build_scripts/cpython-pubkeys.txt | ||
RUN manylinux-entrypoint /build_scripts/build-cpython.sh 3.10.5 | ||
|
||
FROM build_cpython AS build_cpython311 | ||
COPY build_scripts/cpython-pubkey-310-311.txt /build_scripts/cpython-pubkeys.txt | ||
RUN manylinux-entrypoint /build_scripts/build-cpython.sh 3.11.2 | ||
|
||
FROM build_cpython AS all_python | ||
COPY build_scripts/install-pypy.sh \ | ||
build_scripts/pypy.sha256 \ | ||
build_scripts/finalize-python.sh \ | ||
/build_scripts/ | ||
RUN manylinux-entrypoint /build_scripts/install-pypy.sh 3.8 7.3.9 | ||
RUN manylinux-entrypoint /build_scripts/install-pypy.sh 3.9 7.3.9 | ||
COPY --from=build_cpython38 /opt/_internal /opt/_internal/ | ||
COPY --from=build_cpython39 /opt/_internal /opt/_internal/ | ||
COPY --from=build_cpython310 /opt/_internal /opt/_internal/ | ||
COPY --from=build_cpython311 /opt/_internal /opt/_internal/ | ||
RUN manylinux-entrypoint /build_scripts/finalize-python.sh | ||
|
||
|
||
FROM runtime_base | ||
COPY --from=build_git /manylinux-rootfs / | ||
COPY --from=build_cpython /manylinux-rootfs / | ||
COPY --from=all_python /opt/_internal /opt/_internal/ | ||
COPY build_scripts/finalize.sh \ | ||
build_scripts/python-tag-abi-tag.py \ | ||
build_scripts/requirements3.8.txt \ | ||
build_scripts/requirements3.9.txt \ | ||
build_scripts/requirements3.10.txt \ | ||
build_scripts/requirements3.11.txt \ | ||
build_scripts/requirements-base-tools.txt \ | ||
/build_scripts/ | ||
COPY build_scripts/requirements-tools/* /build_scripts/requirements-tools/ | ||
RUN manylinux-entrypoint /build_scripts/finalize.sh && rm -rf /build_scripts | ||
|
||
ENV SSL_CERT_FILE=/opt/_internal/certs.pem | ||
|
||
CMD ["/bin/bash"] | ||
|
||
#Build manylinux2014 docker image end | ||
ARG PYTHON_VERSION=3.9 | ||
ARG TORCH_VERSION=2.1.0 | ||
ARG OPSET_VERSION=15 | ||
ARG INSTALL_DEPS_EXTRA_ARGS | ||
|
||
#Add our own dependencies | ||
ADD scripts /tmp/scripts | ||
RUN cd /tmp/scripts && \ | ||
/tmp/scripts/manylinux/install_centos.sh && \ | ||
/tmp/scripts/install_os_deps.sh -d gpu $INSTALL_DEPS_EXTRA_ARGS && \ | ||
/tmp/scripts/install_rust.sh | ||
|
||
ENV PATH="/root/.cargo/bin/:$PATH" | ||
|
||
RUN /tmp/scripts/install_ninja.sh && \ | ||
/tmp/scripts/install_python_deps.sh -d gpu -v 12.2 -p $PYTHON_VERSION -h $TORCH_VERSION $INSTALL_DEPS_EXTRA_ARGS && \ | ||
rm -rf /tmp/scripts | ||
|
||
ARG BUILD_UID=1001 | ||
ARG BUILD_USER=onnxruntimedev | ||
RUN adduser --uid $BUILD_UID $BUILD_USER | ||
WORKDIR /home/$BUILD_USER | ||
USER $BUILD_USER | ||
ENV PATH /usr/local/dotnet:$PATH | ||
ENV ORTMODULE_ONNX_OPSET_VERSION=$OPSET_VERSION |
7 changes: 7 additions & 0 deletions
7
.../docker/scripts/training/ortmodule/stage1/requirements_torch2.1.0_cu12.2/requirements.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
--pre | ||
-f https://download.pytorch.org/whl/torch_stable.html | ||
torch==2.1.0+cu121 | ||
torchvision==0.16.0+cu121 | ||
torchtext==0.16.0 | ||
packaging==23.1 | ||
setuptools>=68.2.2 |