Skip to content

Commit

Permalink
[ROCm] Update ROCm and MIGraphX CI to ROCm5.7 (microsoft#17834)
Browse files Browse the repository at this point in the history
- Update ROCm and MIGraphX CI to ROCm5.7
- Simplify test exculde file. Some tests will output `registered
execution providers ROCMExecutionProvider were unable to run the model.`
if they cannot run.
- Add `enable_training` build argument for MIGraphX pipeline.
  • Loading branch information
PeixuanZuo authored and kleiti committed Mar 22, 2024
1 parent 861bb9d commit 0f23af5
Show file tree
Hide file tree
Showing 8 changed files with 13 additions and 49 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ variables:
- name: render
value: 109
- name: RocmVersion
value: 5.6
value: 5.7

jobs:
- job: Linux_Build
Expand Down Expand Up @@ -99,6 +99,7 @@ jobs:
ccache -s; \
python tools/ci_build/build.py \
--config Release \
--enable_training \
--cmake_extra_defines \
CMAKE_HIP_COMPILER=/opt/rocm/llvm/bin/clang++ \
onnxruntime_BUILD_KERNEL_EXPLORER=OFF \
Expand Down Expand Up @@ -181,7 +182,7 @@ jobs:
/bin/bash -c "
set -ex; \
cd /build/Release && xargs -a /build/Release/perms.txt chmod a+x; \
bash /onnxruntime_src/tools/ci_build/github/pai/migraphx_test_launcher.sh"
bash /onnxruntime_src/tools/ci_build/github/pai/pai_test_launcher.sh"
workingDirectory: $(Build.SourcesDirectory)
displayName: 'Run onnxruntime unit tests'

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ variables:
- name: render
value: 109
- name: RocmVersion
value: 5.6
value: 5.7
- name: BuildConfig
value: Release

Expand Down Expand Up @@ -98,7 +98,7 @@ jobs:
/bin/bash -c "
set -ex; \
ccache -s; \
/opt/python/cp38-cp38/bin/python3 tools/ci_build/build.py \
/opt/python/cp39-cp39/bin/python3 tools/ci_build/build.py \
--config $(BuildConfig) \
--enable_training \
--mpi_home /opt/ompi \
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Refer to https://github.com/RadeonOpenCompute/ROCm-docker/blob/master/dev/Dockerfile-ubuntu-22.04-complete
FROM ubuntu:22.04

ARG ROCM_VERSION=5.6
ARG ROCM_VERSION=5.7
ARG AMDGPU_VERSION=${ROCM_VERSION}
ARG APT_PREF='Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600'

Expand Down Expand Up @@ -71,12 +71,15 @@ RUN pip install cryptography==41.0.0
# Create migraphx-ci environment
ENV CONDA_ENVIRONMENT_PATH /opt/miniconda/envs/migraphx-ci
ENV CONDA_DEFAULT_ENV migraphx-ci
RUN conda create -y -n ${CONDA_DEFAULT_ENV} python=3.8
RUN conda create -y -n ${CONDA_DEFAULT_ENV} python=3.9
ENV PATH ${CONDA_ENVIRONMENT_PATH}/bin:${PATH}

# Enable migraphx-ci environment
SHELL ["conda", "run", "-n", "migraphx-ci", "/bin/bash", "-c"]

# ln -sf is needed to make sure that version `GLIBCXX_3.4.30' is found
RUN ln -sf /usr/lib/x86_64-linux-gnu/libstdc++.so.6 ${CONDA_ENVIRONMENT_PATH}/bin/../lib/libstdc++.so.6

# Install migraphx
RUN apt update && apt install -y migraphx

Expand Down
2 changes: 0 additions & 2 deletions tools/ci_build/github/pai/migraphx-excluded-tests.txt

This file was deleted.

15 changes: 0 additions & 15 deletions tools/ci_build/github/pai/migraphx_test_launcher.sh

This file was deleted.

23 changes: 0 additions & 23 deletions tools/ci_build/github/pai/pai-excluded-tests.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
CudaKernelTest.NegativeLogLikelihoodLoss_TinySizeTensor
CudaKernelTest.NegativeLogLikelihoodLoss_SmallSizeTensor
CudaKernelTest.NegativeLogLikelihoodLoss_MediumSizeTensor
CudaKernelTest.SoftmaxGrad_LargeTensor_LastAxis_Float16
CudaKernelTest.SoftmaxGrad_LargeTensor_LastAxis_Float16_NoPowerOfTwo
CudaKernelTest.SoftmaxGrad_LargeTensor_AllAxis_Float16
Expand All @@ -10,26 +7,6 @@ CudaKernelTest.LogSoftmaxGrad_LargeTensor_LastAxis_Float16_NoPowerOfTwo
CudaKernelTest.LogSoftmaxGrad_LargeTensor_AllAxis_Float16
CudaKernelTest.LogSoftmaxGrad_LargeTensor_AllAxis_Float16_NoPowerOfTwo
ReductionOpTest.ReductionVariationTest
ReductionOpTest.ReduceLogSumExp_default_axes_keepdims_double
ReductionOpTest.ReduceLogSumExp_default_axes_do_not_keep_dims_double
ReductionOpTest.ReduceLogSumExp_do_not_keepdims_double
ReductionOpTest.ReduceLogSumExp_do_not_keepdims_2_double
ReductionOpTest.ReduceLogSumExp_keepdims_double
ReductionOpTest.ReduceLogSumExp_double
ReductionOpTest.ReduceMax_double
ReductionOpTest.ReduceMean_default_axes_keepdims_double
ReductionOpTest.ReduceMean_default_axes_do_not_keep_dims_double
ReductionOpTest.ReduceMean_do_not_keepdims_double
ReductionOpTest.ReduceMean_do_not_keepdims_2_double
ReductionOpTest.ReduceMean_keepdims_double
ReductionOpTest.ReduceMean_double
ReductionOpTest.ReduceMean0DTensor_double
ReductionOpTest.ReduceMin_double
ReductionOpTest.ReduceSum_double
ReductionOpTest.ReduceSumSquare_double
ReductionOpTest.ReduceInfMax_double
ReductionOpTest.ReduceInfMin_double
ReductionOpTest.ReduceInfLogSumExp_double
GatherOpTest.Gather_invalid_index_cpu
Scatter.InvalidIndex
GradientCheckerTest.AddGrad
Expand Down
6 changes: 3 additions & 3 deletions tools/ci_build/github/pai/rocm-ci-pipeline-env.Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Refer to https://github.com/RadeonOpenCompute/ROCm-docker/blob/master/dev/Dockerfile-ubuntu-22.04-complete
FROM ubuntu:22.04

ARG ROCM_VERSION=5.6
ARG ROCM_VERSION=5.7
ARG AMDGPU_VERSION=${ROCM_VERSION}
ARG APT_PREF='Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600'

Expand Down Expand Up @@ -64,7 +64,7 @@ RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86
# Create rocm-ci environment
ENV CONDA_ENVIRONMENT_PATH /opt/miniconda/envs/rocm-ci
ENV CONDA_DEFAULT_ENV rocm-ci
RUN conda create -y -n ${CONDA_DEFAULT_ENV} python=3.8
RUN conda create -y -n ${CONDA_DEFAULT_ENV} python=3.9
ENV PATH ${CONDA_ENVIRONMENT_PATH}/bin:${PATH}

# Conda base patch
Expand All @@ -77,7 +77,7 @@ SHELL ["conda", "run", "-n", "rocm-ci", "/bin/bash", "-c"]
RUN ln -sf /usr/lib/x86_64-linux-gnu/libstdc++.so.6 ${CONDA_ENVIRONMENT_PATH}/bin/../lib/libstdc++.so.6

# Install Pytorch
RUN pip install install torch==2.0.1 torchvision==0.15.2 -f https://repo.radeon.com/rocm/manylinux/rocm-rel-${ROCM_VERSION}/ && \
RUN pip install torch==2.0.1 torchvision==0.15.2 -f https://repo.radeon.com/rocm/manylinux/rocm-rel-${ROCM_VERSION}/ && \
pip install torch-ort --no-dependencies


Expand Down

0 comments on commit 0f23af5

Please sign in to comment.