Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ROCm] add manylinux build test for ROCm CI #17621

Merged
merged 2 commits into from
Sep 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 104 additions & 3 deletions tools/ci_build/github/azure-pipelines/orttraining-pai-ci-pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,14 @@ pr:
- 'onnxruntime/core/providers/js'
name: 'orttraining_ci_$(Date:yyyyMMdd)_$(Rev:r)'

resources:
repositories:
- repository: manylinux
type: Github
endpoint: Microsoft
name: pypa/manylinux
ref: 5eda9aded5462201e6310105728d33016e637ea7

variables:
- name: video
value: 44
Expand All @@ -22,7 +30,101 @@ variables:
value: Release

jobs:
- job: Linux_Build
- job: Linux_Build_manylinux
variables:
skipComponentGovernanceDetection: true
CCACHE_DIR: $(Pipeline.Workspace)/ccache
TODAY: $[format('{0:dd}{0:MM}{0:yyyy}', pipeline.startTime)]
workspace:
clean: all
pool: onnxruntime-Ubuntu2004-AMD-CPU
timeoutInMinutes: 120

steps:
- task: mspremier.PostBuildCleanup.PostBuildCleanup-task.PostBuildCleanup@3
displayName: 'Clean Agent Directories'
condition: always()

- checkout: self
clean: true
submodules: recursive

- template: templates/get-docker-image-steps.yml
parameters:
Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_rocm
Context: tools/ci_build/github/linux/docker
DockerBuildArgs: >-
--build-arg INSTALL_DEPS_EXTRA_ARGS=-tmur
--network=host --build-arg POLICY=manylinux_2_28 --build-arg PLATFORM=x86_64
--build-arg BUILD_UID=$(id -u)
--build-arg ROCM_VERSION=$(RocmVersion)
--build-arg DEVTOOLSET_ROOTPATH=/opt/rh/gcc-toolset-12/root
--build-arg PREPEND_PATH=/opt/rh/gcc-toolset-12/root/usr/bin:
--build-arg LD_LIBRARY_PATH_ARG=/opt/rh/gcc-toolset-12/root/usr/lib64:/opt/rh/gcc-toolset-12/root/usr/lib:/opt/rh/gcc-toolset-12/root/usr/lib64/dyninst:/opt/rh/gcc-toolset-12/root/usr/lib/dyninst:/usr/local/lib64:/usr/local/lib
Repository: onnxruntimetrainingrocm-cibuild-rocm$(RocmVersion)-manylinux-build

- task: Cache@2
inputs:
key: '"manylinux" | "$(TODAY)" | "$(Build.SourceBranch)" | "$(Build.SourceVersion)"'
path: $(CCACHE_DIR)
cacheHitVar: CACHE_RESTORED
restoreKeys: |
"manylinux" | "$(TODAY)" | "$(Build.SourceBranch)"
"manylinux" | "$(TODAY)" |
displayName: Cache Task

- script: mkdir -p $(CCACHE_DIR)
condition: ne(variables.CACHE_RESTORED, 'true')
displayName: Create Cache Dir

- task: CmdLine@2
inputs:
script: |-
export ROCM_HOME=/opt/rocm
docker run --rm \
--ipc=host \
--network=host \
--cap-add=SYS_PTRACE \
--security-opt seccomp=unconfined \
--shm-size=1024m \
--user $UID:$(id -g $USER) \
-e CC=/opt/rh/gcc-toolset-12/root/usr/bin/cc -e CXX=/opt/rh/gcc-toolset-12/root/usr/bin/c++ -e CFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection -O3 -Wl,--strip-all" -e CXXFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection -O3 -Wl,--strip-all" \
-e CCACHE_DIR=/cache \
--volume $(Build.SourcesDirectory):/onnxruntime_src \
--volume $(Build.BinariesDirectory):/build \
--volume $(CCACHE_DIR):/cache \
--workdir /onnxruntime_src \
onnxruntimetrainingrocm-cibuild-rocm$(RocmVersion)-manylinux-build \
/bin/bash -c "
set -ex; \
ccache -s; \
/opt/python/cp38-cp38/bin/python3 tools/ci_build/build.py \
--config $(BuildConfig) \
--enable_training \
--mpi_home /opt/ompi \
--cmake_extra_defines \
CMAKE_HIP_COMPILER=${ROCM_HOME}/llvm/bin/clang++ \
onnxruntime_BUILD_UNIT_TESTS=OFF \
FETCHCONTENT_TRY_FIND_PACKAGE_MODE=NEVER \
--use_cache \
--use_rocm \
--rocm_version=$(RocmVersion) \
--rocm_home ${ROCM_HOME} \
--nccl_home ${ROCM_HOME}\
--update \
--build_dir /build \
--build \
--parallel \
--build_wheel \
--skip_submodule_sync \
--skip_tests; \
ccache -sv; \
ccache -z"
displayName: 'Build onnxruntime'

- template: templates/explicitly-defined-final-tasks.yml

- job: Linux_Build_ubuntu
variables:
skipComponentGovernanceDetection: true
CCACHE_DIR: $(Pipeline.Workspace)/ccache
Expand Down Expand Up @@ -115,8 +217,7 @@ jobs:

- template: templates/explicitly-defined-final-tasks.yml


- job: Linux_Test
- job: Linux_Test_ubuntu
workspace:
clean: all
pool: AMD-GPU
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,13 @@ RUN cd /tmp/scripts && \
rm -rf /tmp/scripts


# Install ccache to reuse this dockerfile for CI
RUN mkdir -p /tmp/ccache && \
cd /tmp/ccache && \
wget -q -O - https://github.com/ccache/ccache/releases/download/v4.7.4/ccache-4.7.4-linux-x86_64.tar.xz | tar --strip 1 -J -xf - && \
cp /tmp/ccache/ccache /usr/bin && \
rm -rf /tmp/ccache

ARG BUILD_UID=1001
ARG BUILD_USER=onnxruntimedev
RUN adduser --uid $BUILD_UID $BUILD_USER
Expand Down
Loading