Skip to content

Commit

Permalink
Training packaging pipeline for cuda12 (microsoft#18524)
Browse files Browse the repository at this point in the history
### Description
<!-- Describe your changes. -->
Build ORT-training packaging pipeline for CUDA 12.2


### Motivation and Context
<!-- - Why is this change required? What problem does it solve?
- If it fixes an open issue, please link to the issue here. -->
This will help any customer using CUDA 12 and would not need to build
ORT-training from source

Test run:
https://dev.azure.com/aiinfra/Lotus/_build/results?buildId=382993&view=logs&s=130be951-c2f3-5601-5709-434b5e50ddb0
  • Loading branch information
ajindal1 authored and kleiti committed Mar 22, 2024
1 parent f5647cd commit 63de526
Show file tree
Hide file tree
Showing 3 changed files with 209 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
trigger: none

resources:
repositories:
- repository: manylinux
type: Github
endpoint: Microsoft
name: pypa/manylinux
ref: 5eda9aded5462201e6310105728d33016e637ea7

stages:
- template: templates/py-packaging-training-cuda-stage.yml
parameters:
build_py_parameters: --enable_training --update --build
torch_version: '2.1.0'
opset_version: '15'
cuda_version: '12.2'
cmake_cuda_architectures: 70;75;80;86;90
docker_file: Dockerfile.manylinux2_28_training_cuda12_2
agent_pool: Onnxruntime-Linux-GPU
upload_wheel: 'yes'
debug_build: false
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
ARG BASEIMAGE=nvidia/cuda:12.2.2-cudnn8-devel-ubi8
ARG POLICY=manylinux2014
ARG PLATFORM=x86_64
ARG DEVTOOLSET_ROOTPATH=
ARG LD_LIBRARY_PATH_ARG=
ARG PREPEND_PATH=

#We need both CUDA and manylinux. But the CUDA Toolkit End User License Agreement says NVIDIA CUDA Driver Libraries(libcuda.so, libnvidia-ptxjitcompiler.so) are only distributable in applications that meet this criteria:
#1. The application was developed starting from a NVIDIA CUDA container obtained from Docker Hub or the NVIDIA GPU Cloud, and
#2. The resulting application is packaged as a Docker container and distributed to users on Docker Hub or the NVIDIA GPU Cloud only.
#So we use CUDA as the base image then add manylinux on top of it.

#Build manylinux2014 docker image begin
FROM $BASEIMAGE AS runtime_base
ARG POLICY
ARG PLATFORM
ARG DEVTOOLSET_ROOTPATH
ARG LD_LIBRARY_PATH_ARG
ARG PREPEND_PATH
LABEL maintainer="The ManyLinux project"

ENV AUDITWHEEL_POLICY=${POLICY} AUDITWHEEL_ARCH=${PLATFORM} AUDITWHEEL_PLAT=${POLICY}_${PLATFORM}
ENV LC_ALL=en_US.UTF-8 LANG=en_US.UTF-8 LANGUAGE=en_US.UTF-8
ENV DEVTOOLSET_ROOTPATH=${DEVTOOLSET_ROOTPATH}
ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH_ARG}
ENV PATH=${PREPEND_PATH}${PATH}
ENV PKG_CONFIG_PATH=/usr/local/lib/pkgconfig

# first copy the fixup mirrors script, keep the script around
COPY build_scripts/fixup-mirrors.sh /usr/local/sbin/fixup-mirrors

# setup entrypoint, this will wrap commands with `linux32` with i686 images
COPY build_scripts/install-entrypoint.sh \
build_scripts/build_utils.sh \
/build_scripts/

RUN /build_scripts/install-entrypoint.sh && rm -rf /build_scripts
COPY manylinux-entrypoint /usr/local/bin/manylinux-entrypoint
ENTRYPOINT ["manylinux-entrypoint"]

COPY build_scripts/install-runtime-packages.sh \
build_scripts/build_utils.sh \
/build_scripts/
RUN manylinux-entrypoint /build_scripts/install-runtime-packages.sh && rm -rf /build_scripts/

COPY build_scripts/build_utils.sh /build_scripts/

COPY build_scripts/install-autoconf.sh /build_scripts/
RUN export AUTOCONF_ROOT=autoconf-2.71 && \
export AUTOCONF_HASH=431075ad0bf529ef13cb41e9042c542381103e80015686222b8a9d4abef42a1c && \
export AUTOCONF_DOWNLOAD_URL=http://ftp.gnu.org/gnu/autoconf && \
manylinux-entrypoint /build_scripts/install-autoconf.sh

COPY build_scripts/install-automake.sh /build_scripts/
RUN export AUTOMAKE_ROOT=automake-1.16.5 && \
export AUTOMAKE_HASH=07bd24ad08a64bc17250ce09ec56e921d6343903943e99ccf63bbf0705e34605 && \
export AUTOMAKE_DOWNLOAD_URL=http://ftp.gnu.org/gnu/automake && \
manylinux-entrypoint /build_scripts/install-automake.sh

COPY build_scripts/install-libtool.sh /build_scripts/
RUN export LIBTOOL_ROOT=libtool-2.4.7 && \
export LIBTOOL_HASH=04e96c2404ea70c590c546eba4202a4e12722c640016c12b9b2f1ce3d481e9a8 && \
export LIBTOOL_DOWNLOAD_URL=http://ftp.gnu.org/gnu/libtool && \
manylinux-entrypoint /build_scripts/install-libtool.sh

COPY build_scripts/install-libxcrypt.sh /build_scripts/
RUN export LIBXCRYPT_VERSION=4.4.28 && \
export LIBXCRYPT_HASH=db7e37901969cb1d1e8020cb73a991ef81e48e31ea5b76a101862c806426b457 && \
export LIBXCRYPT_DOWNLOAD_URL=https://github.com/besser82/libxcrypt/archive && \
export PERL_ROOT=perl-5.34.0 && \
export PERL_HASH=551efc818b968b05216024fb0b727ef2ad4c100f8cb6b43fab615fa78ae5be9a && \
export PERL_DOWNLOAD_URL=https://www.cpan.org/src/5.0 && \
manylinux-entrypoint /build_scripts/install-libxcrypt.sh

FROM runtime_base AS build_base
COPY build_scripts/install-build-packages.sh /build_scripts/
RUN manylinux-entrypoint /build_scripts/install-build-packages.sh


FROM build_base AS build_git
COPY build_scripts/build-git.sh /build_scripts/
RUN export GIT_ROOT=git-2.36.2 && \
export GIT_HASH=6dc2cdea5fb23d823ba4871cc23222c1db31dfbb6d6c6ff74c4128700df57c68 && \
export GIT_DOWNLOAD_URL=https://www.kernel.org/pub/software/scm/git && \
manylinux-entrypoint /build_scripts/build-git.sh


FROM build_base AS build_cpython
COPY build_scripts/build-sqlite3.sh /build_scripts/
RUN export SQLITE_AUTOCONF_ROOT=sqlite-autoconf-3390200 && \
export SQLITE_AUTOCONF_HASH=852be8a6183a17ba47cee0bbff7400b7aa5affd283bf3beefc34fcd088a239de && \
export SQLITE_AUTOCONF_DOWNLOAD_URL=https://www.sqlite.org/2022 && \
manylinux-entrypoint /build_scripts/build-sqlite3.sh

COPY build_scripts/build-openssl.sh /build_scripts/
RUN export OPENSSL_ROOT=openssl-1.1.1q && \
export OPENSSL_HASH=d7939ce614029cdff0b6c20f0e2e5703158a489a72b2507b8bd51bf8c8fd10ca && \
export OPENSSL_DOWNLOAD_URL=https://www.openssl.org/source && \
manylinux-entrypoint /build_scripts/build-openssl.sh

COPY build_scripts/build-cpython.sh /build_scripts/


FROM build_cpython AS build_cpython38
COPY build_scripts/ambv-pubkey.txt /build_scripts/cpython-pubkeys.txt
RUN manylinux-entrypoint /build_scripts/build-cpython.sh 3.8.13


FROM build_cpython AS build_cpython39
COPY build_scripts/ambv-pubkey.txt /build_scripts/cpython-pubkeys.txt
RUN manylinux-entrypoint /build_scripts/build-cpython.sh 3.9.13


FROM build_cpython AS build_cpython310
COPY build_scripts/cpython-pubkey-310-311.txt /build_scripts/cpython-pubkeys.txt
RUN manylinux-entrypoint /build_scripts/build-cpython.sh 3.10.5

FROM build_cpython AS build_cpython311
COPY build_scripts/cpython-pubkey-310-311.txt /build_scripts/cpython-pubkeys.txt
RUN manylinux-entrypoint /build_scripts/build-cpython.sh 3.11.2

FROM build_cpython AS all_python
COPY build_scripts/install-pypy.sh \
build_scripts/pypy.sha256 \
build_scripts/finalize-python.sh \
/build_scripts/
RUN manylinux-entrypoint /build_scripts/install-pypy.sh 3.8 7.3.9
RUN manylinux-entrypoint /build_scripts/install-pypy.sh 3.9 7.3.9
COPY --from=build_cpython38 /opt/_internal /opt/_internal/
COPY --from=build_cpython39 /opt/_internal /opt/_internal/
COPY --from=build_cpython310 /opt/_internal /opt/_internal/
COPY --from=build_cpython311 /opt/_internal /opt/_internal/
RUN manylinux-entrypoint /build_scripts/finalize-python.sh


FROM runtime_base
COPY --from=build_git /manylinux-rootfs /
COPY --from=build_cpython /manylinux-rootfs /
COPY --from=all_python /opt/_internal /opt/_internal/
COPY build_scripts/finalize.sh \
build_scripts/python-tag-abi-tag.py \
build_scripts/requirements3.8.txt \
build_scripts/requirements3.9.txt \
build_scripts/requirements3.10.txt \
build_scripts/requirements3.11.txt \
build_scripts/requirements-base-tools.txt \
/build_scripts/
COPY build_scripts/requirements-tools/* /build_scripts/requirements-tools/
RUN manylinux-entrypoint /build_scripts/finalize.sh && rm -rf /build_scripts

ENV SSL_CERT_FILE=/opt/_internal/certs.pem

CMD ["/bin/bash"]

#Build manylinux2014 docker image end
ARG PYTHON_VERSION=3.9
ARG TORCH_VERSION=2.1.0
ARG OPSET_VERSION=15
ARG INSTALL_DEPS_EXTRA_ARGS

#Add our own dependencies
ADD scripts /tmp/scripts
RUN cd /tmp/scripts && \
/tmp/scripts/manylinux/install_centos.sh && \
/tmp/scripts/install_os_deps.sh -d gpu $INSTALL_DEPS_EXTRA_ARGS && \
/tmp/scripts/install_rust.sh

ENV PATH="/root/.cargo/bin/:$PATH"

RUN /tmp/scripts/install_ninja.sh && \
/tmp/scripts/install_python_deps.sh -d gpu -v 12.2 -p $PYTHON_VERSION -h $TORCH_VERSION $INSTALL_DEPS_EXTRA_ARGS && \
rm -rf /tmp/scripts

ARG BUILD_UID=1001
ARG BUILD_USER=onnxruntimedev
RUN adduser --uid $BUILD_UID $BUILD_USER
WORKDIR /home/$BUILD_USER
USER $BUILD_USER
ENV PATH /usr/local/dotnet:$PATH
ENV ORTMODULE_ONNX_OPSET_VERSION=$OPSET_VERSION
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
--pre
-f https://download.pytorch.org/whl/torch_stable.html
torch==2.1.0+cu121
torchvision==0.16.0+cu121
torchtext==0.16.0
packaging==23.1
setuptools>=68.2.2

0 comments on commit 63de526

Please sign in to comment.