diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 9d1b39143016b..1070627d5e7da 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -291,12 +291,50 @@ if (onnxruntime_USE_ROCM) message(FATAL_ERROR "ROCM does not support build with CUDA!") endif() + # replicate strategy used by pytorch to get ROCM_VERSION + # https://github.com/pytorch/pytorch/blob/5c5b71b6eebae76d744261715231093e62f0d090/cmake/public/LoadHIP.cmake + # with modification + if (EXISTS "${onnxruntime_ROCM_HOME}/.info/version") + message("\n***** ROCm version from ${onnxruntime_ROCM_HOME}/.info/version ****\n") + file(READ "${onnxruntime_ROCM_HOME}/.info/version" ROCM_VERSION_DEV_RAW) + string(REGEX MATCH "^([0-9]+)\.([0-9]+)\.([0-9]+)-.*$" ROCM_VERSION_MATCH ${ROCM_VERSION_DEV_RAW}) + elseif (EXISTS "${onnxruntime_ROCM_HOME}/include/rocm_version.h") + message("\n***** ROCm version from ${onnxruntime_ROCM_HOME}/include/rocm_version.h ****\n") + file(READ "${onnxruntime_ROCM_HOME}/include/rocm_version.h" ROCM_VERSION_H_RAW) + string(REGEX MATCH "\"([0-9]+)\.([0-9]+)\.([0-9]+).*\"" ROCM_VERSION_MATCH ${ROCM_VERSION_H_RAW}) + elseif (EXISTS "${onnxruntime_ROCM_HOME}/include/rocm-core/rocm_version.h") + message("\n***** ROCm version from ${onnxruntime_ROCM_HOME}/include/rocm-core/rocm_version.h ****\n") + file(READ "${onnxruntime_ROCM_HOME}/include/rocm-core/rocm_version.h" ROCM_VERSION_H_RAW) + string(REGEX MATCH "\"([0-9]+)\.([0-9]+)\.([0-9]+).*\"" ROCM_VERSION_MATCH ${ROCM_VERSION_H_RAW}) + endif() + + if (ROCM_VERSION_MATCH) + set(ROCM_VERSION_DEV_MAJOR ${CMAKE_MATCH_1}) + set(ROCM_VERSION_DEV_MINOR ${CMAKE_MATCH_2}) + set(ROCM_VERSION_DEV_PATCH ${CMAKE_MATCH_3}) + set(ROCM_VERSION_DEV "${ROCM_VERSION_DEV_MAJOR}.${ROCM_VERSION_DEV_MINOR}.${ROCM_VERSION_DEV_PATCH}") + math(EXPR ROCM_VERSION_DEV_INT "(${ROCM_VERSION_DEV_MAJOR}*10000) + (${ROCM_VERSION_DEV_MINOR}*100) + ${ROCM_VERSION_DEV_PATCH}") + + message("ROCM_VERSION_DEV: ${ROCM_VERSION_DEV}") + message("ROCM_VERSION_DEV_MAJOR: ${ROCM_VERSION_DEV_MAJOR}") + message("ROCM_VERSION_DEV_MINOR: ${ROCM_VERSION_DEV_MINOR}") + message("ROCM_VERSION_DEV_PATCH: ${ROCM_VERSION_DEV_PATCH}") + message("ROCM_VERSION_DEV_INT: ${ROCM_VERSION_DEV_INT}") + else() + message(FATAL_ERROR "Cannot determine ROCm version string") + endif() + + if (NOT CMAKE_HIP_COMPILER) set(CMAKE_HIP_COMPILER "${onnxruntime_ROCM_HOME}/llvm/bin/clang++") endif() if (NOT CMAKE_HIP_ARCHITECTURES) - set(CMAKE_HIP_ARCHITECTURES "gfx908;gfx90a;gfx1030;gfx1100;gfx1101;gfx940;gfx941;gfx942;gfx1200;gfx1201") + if (ROCM_VERSION_DEV VERSION_LESS "6.2") + message(FATAL_ERROR "CMAKE_HIP_ARCHITECTURES is not set when ROCm version < 6.2") + else() + set(CMAKE_HIP_ARCHITECTURES "gfx908;gfx90a;gfx1030;gfx1100;gfx1101;gfx940;gfx941;gfx942;gfx1200;gfx1201") + endif() endif() file(GLOB rocm_cmake_components ${onnxruntime_ROCM_HOME}/lib/cmake/*) @@ -328,35 +366,6 @@ if (onnxruntime_USE_ROCM) set(onnxruntime_HIPIFY_PERL ${HIPIFY_PERL_PATH}/hipify-perl) endif() - # replicate strategy used by pytorch to get ROCM_VERSION - # https://github.com/pytorch/pytorch/blob/5c5b71b6eebae76d744261715231093e62f0d090/cmake/public/LoadHIP.cmake - # with modification - if (EXISTS "${onnxruntime_ROCM_HOME}/.info/version") - file(READ "${onnxruntime_ROCM_HOME}/.info/version" ROCM_VERSION_DEV_RAW) - string(REGEX MATCH "^([0-9]+)\.([0-9]+)\.([0-9]+)-.*$" ROCM_VERSION_MATCH ${ROCM_VERSION_DEV_RAW}) - elseif (EXISTS "${onnxruntime_ROCM_HOME}/include/rocm_version.h") - file(READ "${onnxruntime_ROCM_HOME}/include/rocm_version.h" ROCM_VERSION_H_RAW) - string(REGEX MATCH "\"([0-9]+)\.([0-9]+)\.([0-9]+).*\"" ROCM_VERSION_MATCH ${ROCM_VERSION_H_RAW}) - elseif (EXISTS "${onnxruntime_ROCM_HOME}/include/rocm-core/rocm_version.h") - file(READ "${onnxruntime_ROCM_HOME}/include/rocm-core/rocm_version.h" ROCM_VERSION_H_RAW) - string(REGEX MATCH "\"([0-9]+)\.([0-9]+)\.([0-9]+).*\"" ROCM_VERSION_MATCH ${ROCM_VERSION_H_RAW}) - endif() - - if (ROCM_VERSION_MATCH) - set(ROCM_VERSION_DEV_MAJOR ${CMAKE_MATCH_1}) - set(ROCM_VERSION_DEV_MINOR ${CMAKE_MATCH_2}) - set(ROCM_VERSION_DEV_PATCH ${CMAKE_MATCH_3}) - set(ROCM_VERSION_DEV "${ROCM_VERSION_DEV_MAJOR}.${ROCM_VERSION_DEV_MINOR}.${ROCM_VERSION_DEV_PATCH}") - math(EXPR ROCM_VERSION_DEV_INT "(${ROCM_VERSION_DEV_MAJOR}*10000) + (${ROCM_VERSION_DEV_MINOR}*100) + ${ROCM_VERSION_DEV_PATCH}") - else() - message(FATAL_ERROR "Cannot determine ROCm version string") - endif() - message("\n***** ROCm version from ${onnxruntime_ROCM_HOME}/.info/version ****\n") - message("ROCM_VERSION_DEV: ${ROCM_VERSION_DEV}") - message("ROCM_VERSION_DEV_MAJOR: ${ROCM_VERSION_DEV_MAJOR}") - message("ROCM_VERSION_DEV_MINOR: ${ROCM_VERSION_DEV_MINOR}") - message("ROCM_VERSION_DEV_PATCH: ${ROCM_VERSION_DEV_PATCH}") - message("ROCM_VERSION_DEV_INT: ${ROCM_VERSION_DEV_INT}") message("\n***** HIP LANGUAGE CONFIG INFO ****\n") message("CMAKE_HIP_COMPILER: ${CMAKE_HIP_COMPILER}") message("CMAKE_HIP_ARCHITECTURES: ${CMAKE_HIP_ARCHITECTURES}") diff --git a/dockerfiles/Dockerfile.migraphx b/dockerfiles/Dockerfile.migraphx index c3541a8bd3425..c5d998d503899 100644 --- a/dockerfiles/Dockerfile.migraphx +++ b/dockerfiles/Dockerfile.migraphx @@ -5,7 +5,7 @@ # Dockerfile to run ONNXRuntime with MIGraphX integration #-------------------------------------------------------------------------- -FROM rocm/pytorch:rocm6.0_ubuntu20.04_py3.9_pytorch_2.1.1 +FROM rocm/pytorch:rocm6.2.3_ubuntu22.04_py3.10_pytorch_release_2.3.0 ARG ONNXRUNTIME_REPO=https://github.com/Microsoft/onnxruntime ARG ONNXRUNTIME_BRANCH=main diff --git a/dockerfiles/Dockerfile.rocm b/dockerfiles/Dockerfile.rocm index c242933f677f0..bef8d7a5f47d2 100644 --- a/dockerfiles/Dockerfile.rocm +++ b/dockerfiles/Dockerfile.rocm @@ -5,7 +5,7 @@ # Dockerfile to run ONNXRuntime with ROCm integration #-------------------------------------------------------------------------- -FROM rocm/pytorch:rocm6.0_ubuntu20.04_py3.9_pytorch_2.1.1 +FROM rocm/pytorch:rocm6.2.3_ubuntu22.04_py3.10_pytorch_release_2.3.0 ARG ONNXRUNTIME_REPO=https://github.com/Microsoft/onnxruntime ARG ONNXRUNTIME_BRANCH=main diff --git a/dockerfiles/README.md b/dockerfiles/README.md index 7825940571769..9f83fc390eee7 100644 --- a/dockerfiles/README.md +++ b/dockerfiles/README.md @@ -292,7 +292,7 @@ Nothing else from ONNX Runtime source tree will be copied/installed to the image Note: When running the container you built in Docker, please either use 'nvidia-docker' command instead of 'docker', or use Docker command-line options to make sure NVIDIA runtime will be used and appropriate files mounted from host. Otherwise, CUDA libraries won't be found. You can also [set NVIDIA runtime as default in Docker](https://github.com/dusty-nv/jetson-containers#docker-default-runtime). ## MIGraphX -**Ubuntu 20.04, ROCm6.0, MIGraphX** +**Ubuntu 22.04, ROCm6.2.3, MIGraphX** 1. Build the docker image from the Dockerfile in this repository. ``` @@ -306,7 +306,7 @@ Note: When running the container you built in Docker, please either use 'nvidia- ``` ## ROCm -**Ubuntu 20.04, ROCm6.0** +**Ubuntu 22.04, ROCm6.2.3** 1. Build the docker image from the Dockerfile in this repository. ``` diff --git a/onnxruntime/test/providers/internal_testing/internal_testing_tests.cc b/onnxruntime/test/providers/internal_testing/internal_testing_tests.cc index 67fb35d26e6dc..559b521f18782 100644 --- a/onnxruntime/test/providers/internal_testing/internal_testing_tests.cc +++ b/onnxruntime/test/providers/internal_testing/internal_testing_tests.cc @@ -159,7 +159,7 @@ TEST(InternalTestingEP, PreventSaveOfModelWithCompiledOps) { // the internal NHWC operators are only included as part of contrib ops currently. as the EP requests the NHWC // version of the ONNX operator when matching a static kernel, those are required. -#if !defined(DISABLE_CONTRIB_OPS) +#if !defined(DISABLE_CONTRIB_OPS) && !defined(USE_ROCM) TEST(InternalTestingEP, TestMixOfStaticAndCompiledKernels) { const ORTCHAR_T* ort_model_path = ORT_MODEL_FOLDER "transform/fusion/conv_relu_opset12.onnx"; @@ -256,10 +256,6 @@ TEST(InternalTestingEP, TestNhwcConversionOfStaticKernels) { run_test(ort_model_path); } -// This test can be deprecated now as the code logic has been changed so the model is not applicable -// TEST(InternalTestingEP, TestRegisterAllocatorHandlesUsageInMultipleSessions) { -//} - // make sure allocators returned by SessionState::GetAllocator are valid when IExecutionProvider::ReplaceAllocator // is used. if something is off InferenceSession::Initialize will fail. TEST(InternalTestingEP, TestReplaceAllocatorDoesntBreakDueToLocalAllocatorStorage) { diff --git a/tools/ci_build/github/azure-pipelines/linux-migraphx-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-migraphx-ci-pipeline.yml index 1cf60b47b4ded..9e2d8e49a2292 100644 --- a/tools/ci_build/github/azure-pipelines/linux-migraphx-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/linux-migraphx-ci-pipeline.yml @@ -37,9 +37,7 @@ variables: - name: render value: 109 - name: RocmVersion - value: 6.1 - - name: RocmVersionPatchSuffix - value: ".3" + value: 6.2.3 jobs: - job: Linux_Build @@ -66,7 +64,7 @@ jobs: parameters: Dockerfile: tools/ci_build/github/linux/docker/migraphx-ci-pipeline-env.Dockerfile Context: tools/ci_build/github/linux/docker - DockerBuildArgs: "--build-arg ROCM_VERSION=$(RocmVersion)$(RocmVersionPatchSuffix)" + DockerBuildArgs: "--build-arg ROCM_VERSION=$(RocmVersion)" Repository: onnxruntimetrainingmigraphx-cibuild-rocm$(RocmVersion) - task: Cache@2 @@ -165,7 +163,7 @@ jobs: parameters: Dockerfile: tools/ci_build/github/linux/docker/migraphx-ci-pipeline-env.Dockerfile Context: tools/ci_build/github/linux/docker - DockerBuildArgs: "--build-arg ROCM_VERSION=$(RocmVersion)$(RocmVersionPatchSuffix)" + DockerBuildArgs: "--build-arg ROCM_VERSION=$(RocmVersion)" Repository: onnxruntimetrainingmigraphx-cibuild-rocm$(RocmVersion) - task: CmdLine@2 diff --git a/tools/ci_build/github/azure-pipelines/linux-rocm-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-rocm-ci-pipeline.yml index 50f3862761320..c730cc2548038 100644 --- a/tools/ci_build/github/azure-pipelines/linux-rocm-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/linux-rocm-ci-pipeline.yml @@ -37,9 +37,7 @@ variables: - name: render value: 109 - name: RocmVersion - value: 6.1 - - name: RocmVersionPatchSuffix - value: ".3" + value: 6.1.3 jobs: - job: Linux_Build @@ -66,7 +64,7 @@ jobs: parameters: Dockerfile: tools/ci_build/github/linux/docker/rocm-ci-pipeline-env.Dockerfile Context: tools/ci_build/github/linux/docker - DockerBuildArgs: "--build-arg ROCM_VERSION=$(RocmVersion)$(RocmVersionPatchSuffix)" + DockerBuildArgs: "--build-arg ROCM_VERSION=$(RocmVersion)" Repository: onnxruntimerocm-cibuild-rocm$(RocmVersion) - task: Cache@2 @@ -166,7 +164,7 @@ jobs: parameters: Dockerfile: tools/ci_build/github/linux/docker/rocm-ci-pipeline-env.Dockerfile Context: tools/ci_build/github/linux/docker - DockerBuildArgs: "--build-arg ROCM_VERSION=$(RocmVersion)$(RocmVersionPatchSuffix)" + DockerBuildArgs: "--build-arg ROCM_VERSION=$(RocmVersion)" Repository: onnxruntimerocm-cibuild-rocm$(RocmVersion) - task: CmdLine@2 @@ -231,7 +229,11 @@ jobs: -e KERNEL_EXPLORER_TEST_USE_CUPY=1 \ -e CUPY_CACHE_DIR=/build/Release \ onnxruntimerocm-cibuild-rocm$(RocmVersion) \ - pytest /onnxruntime_src/onnxruntime/python/tools/kernel_explorer/ -n 4 --reruns 1 --durations=100 + /bin/bash -c " + set -ex; \ + python --version; \ + ls /opt/miniconda/envs/rocm-ci/lib/; \ + pytest /onnxruntime_src/onnxruntime/python/tools/kernel_explorer/ -n 4 --reruns 1 --durations=100" workingDirectory: $(Build.SourcesDirectory) displayName: 'Run kernel explorer tests' condition: succeededOrFailed() diff --git a/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_rocm b/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_rocm index f63f508852fc2..e4c3af05053ba 100644 --- a/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_rocm +++ b/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_rocm @@ -6,7 +6,7 @@ ARG LD_LIBRARY_PATH_ARG=${DEVTOOLSET_ROOTPATH}/usr/lib64:${DEVTOOLSET_ROOTPATH}/ ARG PREPEND_PATH=${DEVTOOLSET_ROOTPATH}/usr/bin: FROM $BASEIMAGE AS base_image -ARG ROCM_VERSION=5.5 +ARG ROCM_VERSION=6.2.3 #Add our own dependencies ADD scripts /tmp/scripts diff --git a/tools/ci_build/github/linux/docker/migraphx-ci-pipeline-env.Dockerfile b/tools/ci_build/github/linux/docker/migraphx-ci-pipeline-env.Dockerfile index 98ea5e119c319..51591e11ea2e9 100644 --- a/tools/ci_build/github/linux/docker/migraphx-ci-pipeline-env.Dockerfile +++ b/tools/ci_build/github/linux/docker/migraphx-ci-pipeline-env.Dockerfile @@ -1,7 +1,7 @@ # Refer to https://github.com/RadeonOpenCompute/ROCm-docker/blob/master/dev/Dockerfile-ubuntu-22.04-complete FROM ubuntu:22.04 -ARG ROCM_VERSION=6.0 +ARG ROCM_VERSION=6.2.3 ARG AMDGPU_VERSION=${ROCM_VERSION} ARG APT_PREF='Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' @@ -68,7 +68,7 @@ RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86 # Create migraphx-ci environment ENV CONDA_ENVIRONMENT_PATH /opt/miniconda/envs/migraphx-ci ENV CONDA_DEFAULT_ENV migraphx-ci -RUN conda create -y -n ${CONDA_DEFAULT_ENV} python=3.9 +RUN conda create -y -n ${CONDA_DEFAULT_ENV} python=3.10 ENV PATH ${CONDA_ENVIRONMENT_PATH}/bin:${PATH} # Enable migraphx-ci environment @@ -80,4 +80,4 @@ RUN ln -sf /usr/lib/x86_64-linux-gnu/libstdc++.so.6 ${CONDA_ENVIRONMENT_PATH}/bi # Install migraphx RUN apt update && apt install -y migraphx -RUN pip install numpy packaging ml_dtypes==0.3.0 +RUN pip install numpy packaging ml_dtypes==0.5.0 diff --git a/tools/ci_build/github/linux/docker/rocm-ci-pipeline-env.Dockerfile b/tools/ci_build/github/linux/docker/rocm-ci-pipeline-env.Dockerfile index 749e222aff499..f74c5c7b0295e 100644 --- a/tools/ci_build/github/linux/docker/rocm-ci-pipeline-env.Dockerfile +++ b/tools/ci_build/github/linux/docker/rocm-ci-pipeline-env.Dockerfile @@ -1,7 +1,7 @@ # Refer to https://github.com/RadeonOpenCompute/ROCm-docker/blob/master/dev/Dockerfile-ubuntu-22.04-complete FROM ubuntu:22.04 -ARG ROCM_VERSION=6.0 +ARG ROCM_VERSION=6.1.3 ARG AMDGPU_VERSION=${ROCM_VERSION} ARG APT_PREF='Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' @@ -67,26 +67,30 @@ RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86 # Create rocm-ci environment ENV CONDA_ENVIRONMENT_PATH /opt/miniconda/envs/rocm-ci ENV CONDA_DEFAULT_ENV rocm-ci -RUN conda create -y -n ${CONDA_DEFAULT_ENV} python=3.9 +RUN conda create -y -n ${CONDA_DEFAULT_ENV} python=3.10 ENV PATH ${CONDA_ENVIRONMENT_PATH}/bin:${PATH} # Enable rocm-ci environment SHELL ["conda", "run", "-n", "rocm-ci", "/bin/bash", "-c"] -# ln -sf is needed to make sure that version `GLIBCXX_3.4.30' is found +# Some DLLs in the conda environment have conflict with the one installed in Ubuntu system. +# For example, the GCC version in the conda environment is 12.x, while the one in the Ubuntu 22.04 is 11.x. +# ln -sf to make sure we always use libstdc++.so.6 and libgcc_s.so.1 in the system. RUN ln -sf /usr/lib/x86_64-linux-gnu/libstdc++.so.6 ${CONDA_ENVIRONMENT_PATH}/bin/../lib/libstdc++.so.6 +RUN ln -sf /usr/lib/x86_64-linux-gnu/libgcc_s.so.1 ${CONDA_ENVIRONMENT_PATH}/bin/../lib/libgcc_s.so.1 RUN pip install packaging \ - ml_dtypes==0.3.0 \ + ml_dtypes==0.5.0 \ pytest==7.4.4 \ pytest-xdist \ pytest-rerunfailures \ - scipy==1.10.0 \ - numpy==1.24.1 + scipy==1.14.1 \ + numpy==1.26.4 RUN apt install -y git # Install Cupy to decrease CPU utilization +# Note that the version of Cupy requires numpy < 1.27 RUN git clone https://github.com/ROCm/cupy && cd cupy && \ git checkout 432a8683351d681e00903640489cb2f4055d2e09 && \ export CUPY_INSTALL_USE_HIP=1 && \ diff --git a/tools/ci_build/github/linux/docker/scripts/setup_rocm_yum_repo.sh b/tools/ci_build/github/linux/docker/scripts/setup_rocm_yum_repo.sh index 269337bbba042..0be64d96f3a34 100755 --- a/tools/ci_build/github/linux/docker/scripts/setup_rocm_yum_repo.sh +++ b/tools/ci_build/github/linux/docker/scripts/setup_rocm_yum_repo.sh @@ -2,7 +2,7 @@ set -e -x # version -ROCM_VERSION=6.0 +ROCM_VERSION=6.2.3 while getopts "r:" parameter_Option do case "${parameter_Option}"