Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Training packaging pipeline for cuda12 #18524

Merged
merged 7 commits into from
Nov 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
trigger: none

resources:
repositories:
- repository: manylinux
type: Github
endpoint: Microsoft
name: pypa/manylinux
ref: 5eda9aded5462201e6310105728d33016e637ea7

stages:
- template: templates/py-packaging-training-cuda-stage.yml
parameters:
build_py_parameters: --enable_training --update --build
torch_version: '2.1.0'
opset_version: '15'
cuda_version: '12.2'
cmake_cuda_architectures: 70;75;80;86;90
docker_file: Dockerfile.manylinux2_28_training_cuda12_2
agent_pool: Onnxruntime-Linux-GPU
upload_wheel: 'yes'
debug_build: false
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
ARG BASEIMAGE=nvidia/cuda:12.2.2-cudnn8-devel-ubi8
ARG POLICY=manylinux2014
ARG PLATFORM=x86_64
ARG DEVTOOLSET_ROOTPATH=
ARG LD_LIBRARY_PATH_ARG=
ARG PREPEND_PATH=

#We need both CUDA and manylinux. But the CUDA Toolkit End User License Agreement says NVIDIA CUDA Driver Libraries(libcuda.so, libnvidia-ptxjitcompiler.so) are only distributable in applications that meet this criteria:
#1. The application was developed starting from a NVIDIA CUDA container obtained from Docker Hub or the NVIDIA GPU Cloud, and
#2. The resulting application is packaged as a Docker container and distributed to users on Docker Hub or the NVIDIA GPU Cloud only.
#So we use CUDA as the base image then add manylinux on top of it.

#Build manylinux2014 docker image begin
FROM $BASEIMAGE AS runtime_base
ARG POLICY
ARG PLATFORM
ARG DEVTOOLSET_ROOTPATH
ARG LD_LIBRARY_PATH_ARG
ARG PREPEND_PATH
LABEL maintainer="The ManyLinux project"

ENV AUDITWHEEL_POLICY=${POLICY} AUDITWHEEL_ARCH=${PLATFORM} AUDITWHEEL_PLAT=${POLICY}_${PLATFORM}
ENV LC_ALL=en_US.UTF-8 LANG=en_US.UTF-8 LANGUAGE=en_US.UTF-8
ENV DEVTOOLSET_ROOTPATH=${DEVTOOLSET_ROOTPATH}
ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH_ARG}
ENV PATH=${PREPEND_PATH}${PATH}
ENV PKG_CONFIG_PATH=/usr/local/lib/pkgconfig

# first copy the fixup mirrors script, keep the script around
COPY build_scripts/fixup-mirrors.sh /usr/local/sbin/fixup-mirrors

# setup entrypoint, this will wrap commands with `linux32` with i686 images
COPY build_scripts/install-entrypoint.sh \
build_scripts/build_utils.sh \
/build_scripts/

RUN /build_scripts/install-entrypoint.sh && rm -rf /build_scripts
COPY manylinux-entrypoint /usr/local/bin/manylinux-entrypoint
ENTRYPOINT ["manylinux-entrypoint"]

COPY build_scripts/install-runtime-packages.sh \
build_scripts/build_utils.sh \
/build_scripts/
RUN manylinux-entrypoint /build_scripts/install-runtime-packages.sh && rm -rf /build_scripts/

COPY build_scripts/build_utils.sh /build_scripts/

COPY build_scripts/install-autoconf.sh /build_scripts/
RUN export AUTOCONF_ROOT=autoconf-2.71 && \
export AUTOCONF_HASH=431075ad0bf529ef13cb41e9042c542381103e80015686222b8a9d4abef42a1c && \
export AUTOCONF_DOWNLOAD_URL=http://ftp.gnu.org/gnu/autoconf && \
manylinux-entrypoint /build_scripts/install-autoconf.sh

COPY build_scripts/install-automake.sh /build_scripts/
RUN export AUTOMAKE_ROOT=automake-1.16.5 && \
export AUTOMAKE_HASH=07bd24ad08a64bc17250ce09ec56e921d6343903943e99ccf63bbf0705e34605 && \
export AUTOMAKE_DOWNLOAD_URL=http://ftp.gnu.org/gnu/automake && \
manylinux-entrypoint /build_scripts/install-automake.sh

COPY build_scripts/install-libtool.sh /build_scripts/
RUN export LIBTOOL_ROOT=libtool-2.4.7 && \
export LIBTOOL_HASH=04e96c2404ea70c590c546eba4202a4e12722c640016c12b9b2f1ce3d481e9a8 && \
export LIBTOOL_DOWNLOAD_URL=http://ftp.gnu.org/gnu/libtool && \
manylinux-entrypoint /build_scripts/install-libtool.sh

COPY build_scripts/install-libxcrypt.sh /build_scripts/
RUN export LIBXCRYPT_VERSION=4.4.28 && \
export LIBXCRYPT_HASH=db7e37901969cb1d1e8020cb73a991ef81e48e31ea5b76a101862c806426b457 && \
export LIBXCRYPT_DOWNLOAD_URL=https://github.com/besser82/libxcrypt/archive && \
export PERL_ROOT=perl-5.34.0 && \
export PERL_HASH=551efc818b968b05216024fb0b727ef2ad4c100f8cb6b43fab615fa78ae5be9a && \
export PERL_DOWNLOAD_URL=https://www.cpan.org/src/5.0 && \
manylinux-entrypoint /build_scripts/install-libxcrypt.sh

FROM runtime_base AS build_base
COPY build_scripts/install-build-packages.sh /build_scripts/
RUN manylinux-entrypoint /build_scripts/install-build-packages.sh


FROM build_base AS build_git
COPY build_scripts/build-git.sh /build_scripts/
RUN export GIT_ROOT=git-2.36.2 && \
export GIT_HASH=6dc2cdea5fb23d823ba4871cc23222c1db31dfbb6d6c6ff74c4128700df57c68 && \
export GIT_DOWNLOAD_URL=https://www.kernel.org/pub/software/scm/git && \
manylinux-entrypoint /build_scripts/build-git.sh


FROM build_base AS build_cpython
COPY build_scripts/build-sqlite3.sh /build_scripts/
RUN export SQLITE_AUTOCONF_ROOT=sqlite-autoconf-3390200 && \
export SQLITE_AUTOCONF_HASH=852be8a6183a17ba47cee0bbff7400b7aa5affd283bf3beefc34fcd088a239de && \
export SQLITE_AUTOCONF_DOWNLOAD_URL=https://www.sqlite.org/2022 && \
manylinux-entrypoint /build_scripts/build-sqlite3.sh

COPY build_scripts/build-openssl.sh /build_scripts/
RUN export OPENSSL_ROOT=openssl-1.1.1q && \
export OPENSSL_HASH=d7939ce614029cdff0b6c20f0e2e5703158a489a72b2507b8bd51bf8c8fd10ca && \
export OPENSSL_DOWNLOAD_URL=https://www.openssl.org/source && \
manylinux-entrypoint /build_scripts/build-openssl.sh

COPY build_scripts/build-cpython.sh /build_scripts/


FROM build_cpython AS build_cpython38
COPY build_scripts/ambv-pubkey.txt /build_scripts/cpython-pubkeys.txt
RUN manylinux-entrypoint /build_scripts/build-cpython.sh 3.8.13


FROM build_cpython AS build_cpython39
COPY build_scripts/ambv-pubkey.txt /build_scripts/cpython-pubkeys.txt
RUN manylinux-entrypoint /build_scripts/build-cpython.sh 3.9.13


FROM build_cpython AS build_cpython310
COPY build_scripts/cpython-pubkey-310-311.txt /build_scripts/cpython-pubkeys.txt
RUN manylinux-entrypoint /build_scripts/build-cpython.sh 3.10.5

FROM build_cpython AS build_cpython311
COPY build_scripts/cpython-pubkey-310-311.txt /build_scripts/cpython-pubkeys.txt
RUN manylinux-entrypoint /build_scripts/build-cpython.sh 3.11.2

FROM build_cpython AS all_python
COPY build_scripts/install-pypy.sh \
build_scripts/pypy.sha256 \
build_scripts/finalize-python.sh \
/build_scripts/
RUN manylinux-entrypoint /build_scripts/install-pypy.sh 3.8 7.3.9
RUN manylinux-entrypoint /build_scripts/install-pypy.sh 3.9 7.3.9
COPY --from=build_cpython38 /opt/_internal /opt/_internal/
COPY --from=build_cpython39 /opt/_internal /opt/_internal/
COPY --from=build_cpython310 /opt/_internal /opt/_internal/
COPY --from=build_cpython311 /opt/_internal /opt/_internal/
RUN manylinux-entrypoint /build_scripts/finalize-python.sh


FROM runtime_base
COPY --from=build_git /manylinux-rootfs /
COPY --from=build_cpython /manylinux-rootfs /
COPY --from=all_python /opt/_internal /opt/_internal/
COPY build_scripts/finalize.sh \
build_scripts/python-tag-abi-tag.py \
build_scripts/requirements3.8.txt \
build_scripts/requirements3.9.txt \
build_scripts/requirements3.10.txt \
build_scripts/requirements3.11.txt \
build_scripts/requirements-base-tools.txt \
/build_scripts/
COPY build_scripts/requirements-tools/* /build_scripts/requirements-tools/
RUN manylinux-entrypoint /build_scripts/finalize.sh && rm -rf /build_scripts

ENV SSL_CERT_FILE=/opt/_internal/certs.pem

CMD ["/bin/bash"]

#Build manylinux2014 docker image end
ARG PYTHON_VERSION=3.9
ARG TORCH_VERSION=2.1.0
ARG OPSET_VERSION=15
ARG INSTALL_DEPS_EXTRA_ARGS

#Add our own dependencies
ADD scripts /tmp/scripts
RUN cd /tmp/scripts && \
/tmp/scripts/manylinux/install_centos.sh && \
/tmp/scripts/install_os_deps.sh -d gpu $INSTALL_DEPS_EXTRA_ARGS && \
/tmp/scripts/install_rust.sh

ENV PATH="/root/.cargo/bin/:$PATH"

RUN /tmp/scripts/install_ninja.sh && \
/tmp/scripts/install_python_deps.sh -d gpu -v 12.2 -p $PYTHON_VERSION -h $TORCH_VERSION $INSTALL_DEPS_EXTRA_ARGS && \
rm -rf /tmp/scripts

ARG BUILD_UID=1001
ARG BUILD_USER=onnxruntimedev
RUN adduser --uid $BUILD_UID $BUILD_USER
WORKDIR /home/$BUILD_USER
USER $BUILD_USER
ENV PATH /usr/local/dotnet:$PATH
ENV ORTMODULE_ONNX_OPSET_VERSION=$OPSET_VERSION
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
--pre
-f https://download.pytorch.org/whl/torch_stable.html
torch==2.1.0+cu121
torchvision==0.16.0+cu121
torchtext==0.16.0
packaging==23.1
setuptools>=68.2.2
Loading