From 53a3c46950221310cee34660b75bc67f55730faa Mon Sep 17 00:00:00 2001 From: Johannes M Dieterich Date: Mon, 12 Nov 2018 14:32:31 -0800 Subject: [PATCH] Switch to packaged Thrust on Ubuntu, enable CentOS 7.5 as a CI target (#12899) Summary: 1) Use the hip-thrust version of Thrust as opposed to the GH master. (ROCm 267) 2) CentOS 7.5 docker (ROCm 279) * Always install the libraries at docker creation for ubuntu. * Add Dockerfile for CentOS ROCm * Enable the centos build * Source devtoolset in bashrc * Set locales correctly depending on whether we are on Ubuntu or CentOS * Install a newer cmake for CentOS * Checkout thrust as there is no package for CentOS yet. PyTorch/Caffe2 on ROCm passed tests: https://github.com/ROCmSoftwarePlatform/pytorch/pull/280 For attention: bddppq ezyang Docker rebuild for Ubuntu not urgent (getting rid of Thrust checkout and package install is mainly cosmetic). If docker for CentOS 7.5 is wanted, build is necessary. Build of PyTorch tested by me in CentOS docker. PyTorch unit tests work mostly, however, a test in test_jit causes a python recursion error that seems to be due to the python2 on CentOS as we haven't ever seen this on Ubuntu - hence please do not enable unit tests. Pull Request resolved: https://github.com/pytorch/pytorch/pull/12899 Differential Revision: D13029424 Pulled By: bddppq fbshipit-source-id: 1ca8f4337ec6a603f2742fc81046d5b8f8717c76 --- .jenkins/caffe2/build.sh | 18 ----- .jenkins/caffe2/test.sh | 14 ---- .jenkins/pytorch/build.sh | 16 ---- .jenkins/pytorch/enabled-configs.txt | 1 + aten/src/ATen/native/cuda/ReduceOpsKernel.cu | 13 ++++ caffe2/CMakeLists.txt | 17 ++-- cmake/Dependencies.cmake | 47 +++++------ cmake/public/LoadHIP.cmake | 11 ++- cmake/public/utils.cmake | 18 +---- docker/caffe2/jenkins/build.sh | 8 ++ docker/caffe2/jenkins/centos-rocm/.gitignore | 1 + docker/caffe2/jenkins/centos-rocm/Dockerfile | 56 ++++++++++++++ .../jenkins/common/install_devtoolset.sh | 10 +++ docker/caffe2/jenkins/common/install_rocm.sh | 77 +++++++++++++++---- docker/caffe2/jenkins/ubuntu-rocm/Dockerfile | 3 +- setup.py | 2 + tools/amd_build/build_pytorch_amd.py | 18 ----- .../pyHIPIFY/cuda_to_hip_mappings.py | 5 +- 18 files changed, 200 insertions(+), 135 deletions(-) create mode 100644 docker/caffe2/jenkins/centos-rocm/.gitignore create mode 100644 docker/caffe2/jenkins/centos-rocm/Dockerfile create mode 100755 docker/caffe2/jenkins/common/install_devtoolset.sh diff --git a/.jenkins/caffe2/build.sh b/.jenkins/caffe2/build.sh index 38ab321f1ddf93..8eb23eaa29064c 100755 --- a/.jenkins/caffe2/build.sh +++ b/.jenkins/caffe2/build.sh @@ -4,7 +4,6 @@ set -ex pip install --user --no-cache-dir hypothesis==3.59.0 - # The INSTALL_PREFIX here must match up with test.sh INSTALL_PREFIX="/usr/local/caffe2" LOCAL_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) @@ -154,23 +153,6 @@ if [[ $BUILD_ENVIRONMENT == *rocm* ]]; then # This is needed to read datasets from https://download.caffe2.ai/databases/resnet_trainer.zip CMAKE_ARGS+=("-USE_LMDB=ON") - # TODO: This is patching the official FindHip to properly handly - # cmake generator expression. A PR is opened in the upstream repo here: - # https://github.com/ROCm-Developer-Tools/HIP/pull/516 - # remove this hack once it's merged. - if [[ -f /opt/rocm/hip/cmake/FindHIP.cmake ]]; then - sudo sed -i 's/\ -I${dir}/\ $<$:-I${dir}>/' /opt/rocm/hip/cmake/FindHIP.cmake - fi - - export LANG=C.UTF-8 - export LC_ALL=C.UTF-8 - export HCC_AMDGPU_TARGET=gfx900 - - # The link time of libcaffe2_hip.so takes 40 minutes, according to - # https://github.com/RadeonOpenCompute/hcc#thinlto-phase-1---implemented - # using using ThinLTO could significantly improve link-time performance. - export KMTHINLTO=1 - ########## HIPIFY Caffe2 operators ${PYTHON} "${ROOT_DIR}/tools/amd_build/build_pytorch_amd.py" ${PYTHON} "${ROOT_DIR}/tools/amd_build/build_caffe2_amd.py" diff --git a/.jenkins/caffe2/test.sh b/.jenkins/caffe2/test.sh index 687e10bc0c29ba..2a04660e287517 100755 --- a/.jenkins/caffe2/test.sh +++ b/.jenkins/caffe2/test.sh @@ -49,20 +49,6 @@ fi mkdir -p $TEST_DIR/{cpp,python} -if [[ $BUILD_ENVIRONMENT == *-rocm* ]]; then - export LANG=C.UTF-8 - export LC_ALL=C.UTF-8 - - # Pin individual runs to specific gpu so that we can schedule - # multiple jobs on machines that have multi-gpu. - NUM_AMD_GPUS=$(/opt/rocm/bin/rocminfo | grep 'Device Type.*GPU' | wc -l) - if (( $NUM_AMD_GPUS == 0 )); then - echo >&2 "No AMD GPU detected!" - exit 1 - fi - export HIP_VISIBLE_DEVICES=$(($BUILD_NUMBER % $NUM_AMD_GPUS)) -fi - cd "${WORKSPACE}" # C++ tests diff --git a/.jenkins/pytorch/build.sh b/.jenkins/pytorch/build.sh index f91a7170dcb62d..e4381f4b261cc9 100755 --- a/.jenkins/pytorch/build.sh +++ b/.jenkins/pytorch/build.sh @@ -43,22 +43,6 @@ cmake --version pip install -q -r requirements.txt || true if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then - # This is necessary in order to cross compile (or else we'll have missing GPU device). - export HCC_AMDGPU_TARGET=gfx900 - - # These environment variables are not set on CI when we were running as the Jenkins user. - # The HIP Utility scripts require these environment variables to be set in order to run without error. - export LANG=C.UTF-8 - export LC_ALL=C.UTF-8 - - # This environment variable enabled HCC Optimizations that speed up the linking stage. - # https://github.com/RadeonOpenCompute/hcc#hcc-with-thinlto-linking - export KMTHINLTO=1 - - # Need the libc++1 and libc++abi1 libraries to allow torch._C to load at runtime - sudo apt-get -qq install libc++1 - sudo apt-get -qq install libc++abi1 - # When hcc runs out of memory, it silently exits without stopping # the build process, leaving undefined symbols in the shared lib # which will cause undefined symbol errors when later running diff --git a/.jenkins/pytorch/enabled-configs.txt b/.jenkins/pytorch/enabled-configs.txt index fc27bed120bd79..b11269e9660c12 100644 --- a/.jenkins/pytorch/enabled-configs.txt +++ b/.jenkins/pytorch/enabled-configs.txt @@ -44,6 +44,7 @@ short-perf-test-cpu short-perf-test-gpu py2-clang7-rocmdeb-ubuntu16.04-build py2-clang7-rocmdeb-ubuntu16.04-test +py2-devtoolset7-rocmrpm-centos7.5-build pytorch-ppc64le-cuda9.2-cudnn7-py3-build pytorch-ppc64le-cuda9.2-cudnn7-py3-test pytorch-ppc64le-cuda9.1-cudnn7-py3-build diff --git a/aten/src/ATen/native/cuda/ReduceOpsKernel.cu b/aten/src/ATen/native/cuda/ReduceOpsKernel.cu index de26f8245ab023..6a1b00845fcc07 100644 --- a/aten/src/ATen/native/cuda/ReduceOpsKernel.cu +++ b/aten/src/ATen/native/cuda/ReduceOpsKernel.cu @@ -18,6 +18,19 @@ void sum_kernel_impl(TensorIterator& iter) { }); } +#ifdef __HIPCC__ +template <> +void sum_kernel_impl(TensorIterator& iter) { + // There is a Register Coalescing bug in LLVM causing the hcc + // compiler segfaults: + // https://bugs.llvm.org/show_bug.cgi?id=39602 + // To work around it, use int32 as the accumulate type. + gpu_reduce_kernel(iter, []GPU_LAMBDA(int32_t a, int32_t b) -> int32_t { + return a + b; + }); +} +#endif + template void prod_kernel_impl(TensorIterator& iter) { gpu_reduce_kernel(iter, []GPU_LAMBDA(acc_t a, acc_t b) -> acc_t { diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt index 0a5447ad1c730f..07507f970c124e 100644 --- a/caffe2/CMakeLists.txt +++ b/caffe2/CMakeLists.txt @@ -382,7 +382,7 @@ if(USE_ROCM) hip_add_library(caffe2_hip ${Caffe2_HIP_SRCS}) # Since PyTorch files contain HIP headers, these flags are required for the necessary definitions to be added. - target_compile_options(caffe2_hip PRIVATE ${HIP_HIPCC_FLAGS}) + target_compile_options(caffe2_hip PRIVATE ${HIP_HCC_FLAGS}) target_link_libraries(caffe2_hip PUBLIC caffe2) target_link_libraries(caffe2_hip PUBLIC ${Caffe2_HIP_DEPENDENCY_LIBS}) @@ -393,9 +393,6 @@ if(USE_ROCM) # Set standard properties on the target torch_set_target_props(caffe2_hip) - # When a library has object files that contain device code, it needs to use hipcc/hcc to link. - set_target_properties(caffe2_hip PROPERTIES LINKER_LANGUAGE HIP) - caffe2_interface_library(caffe2_hip caffe2_hip_library) list(APPEND Caffe2_MAIN_LIBS caffe2_hip_library) install(TARGETS caffe2_hip EXPORT Caffe2Targets DESTINATION lib) @@ -441,10 +438,11 @@ if (BUILD_TEST) foreach(test_src ${Caffe2_HIP_TEST_SRCS}) set_source_files_properties(${test_src} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1) get_filename_component(test_name ${test_src} NAME_WE) - hip_add_executable(${test_name} "${test_src}") + add_executable(${test_name} "${test_src}") target_link_libraries(${test_name} ${Caffe2_MAIN_LIBS} gtest_main) target_include_directories(${test_name} PRIVATE $) - target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE}) + target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE} ${Caffe2_HIP_INCLUDES}) + target_compile_options(${test_name} PRIVATE ${HIP_CXX_FLAGS}) add_test(NAME ${test_name} COMMAND $) if (INSTALL_TEST) install(TARGETS ${test_name} DESTINATION test) @@ -563,16 +561,15 @@ if (BUILD_PYTHON) endif() if(USE_ROCM) - hip_add_library(caffe2_pybind11_state_hip MODULE ${Caffe2_HIP_PYTHON_SRCS}) - set_target_properties(caffe2_pybind11_state_hip PROPERTIES LINKER_LANGUAGE HIP) - target_compile_options(caffe2_pybind11_state_hip PRIVATE ${HIP_HIPCC_FLAGS} -fvisibility=hidden) + add_library(caffe2_pybind11_state_hip MODULE ${Caffe2_HIP_PYTHON_SRCS}) + target_compile_options(caffe2_pybind11_state_hip PRIVATE ${HIP_CXX_FLAGS} -fvisibility=hidden) set_target_properties(caffe2_pybind11_state_hip PROPERTIES PREFIX "") set_target_properties(caffe2_pybind11_state_hip PROPERTIES SUFFIX ${PY_EXT_SUFFIX}) if (APPLE) set_target_properties(caffe2_pybind11_state_hip PROPERTIES LINK_FLAGS "-undefined dynamic_lookup") endif() target_include_directories(caffe2_pybind11_state_hip PRIVATE $) - target_include_directories(caffe2_pybind11_state_hip PRIVATE ${Caffe2_CPU_INCLUDE}) + target_include_directories(caffe2_pybind11_state_hip PRIVATE ${Caffe2_CPU_INCLUDE} ${Caffe2_HIP_INCLUDES}) target_link_libraries( caffe2_pybind11_state_hip caffe2_library caffe2_hip_library) if (WIN32) diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake index 69efffd6b7868a..67edfb6761a842 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -673,21 +673,27 @@ if(NOT BUILD_ATEN_MOBILE) message(INFO "Compiling with HIP for AMD.") caffe2_update_option(USE_ROCM ON) - list(APPEND HIP_HIPCC_FLAGS -fPIC) - list(APPEND HIP_HIPCC_FLAGS -D__HIP_PLATFORM_HCC__=1) - list(APPEND HIP_HIPCC_FLAGS -DCUDA_HAS_FP16=1) - list(APPEND HIP_HIPCC_FLAGS -D__HIP_NO_HALF_OPERATORS__=1) - list(APPEND HIP_HIPCC_FLAGS -D__HIP_NO_HALF_CONVERSIONS__=1) - list(APPEND HIP_HIPCC_FLAGS -DHIP_VERSION=${HIP_VERSION_MAJOR}) - list(APPEND HIP_HIPCC_FLAGS -Wno-macro-redefined) - list(APPEND HIP_HIPCC_FLAGS -Wno-inconsistent-missing-override) - list(APPEND HIP_HIPCC_FLAGS -Wno-exceptions) - list(APPEND HIP_HIPCC_FLAGS -Wno-shift-count-negative) - list(APPEND HIP_HIPCC_FLAGS -Wno-shift-count-overflow) - list(APPEND HIP_HIPCC_FLAGS -Wno-unused-command-line-argument) - list(APPEND HIP_HIPCC_FLAGS -Wno-duplicate-decl-specifier) - list(APPEND HIP_HIPCC_FLAGS -DCAFFE2_USE_MIOPEN) - list(APPEND HIP_HIPCC_FLAGS -DROCBLAS_FP16=0) + list(APPEND HIP_CXX_FLAGS -fPIC) + list(APPEND HIP_CXX_FLAGS -D__HIP_PLATFORM_HCC__=1) + list(APPEND HIP_CXX_FLAGS -DCUDA_HAS_FP16=1) + list(APPEND HIP_CXX_FLAGS -D__HIP_NO_HALF_OPERATORS__=1) + list(APPEND HIP_CXX_FLAGS -D__HIP_NO_HALF_CONVERSIONS__=1) + list(APPEND HIP_CXX_FLAGS -DHIP_VERSION=${HIP_VERSION_MAJOR}) + list(APPEND HIP_CXX_FLAGS -Wno-macro-redefined) + list(APPEND HIP_CXX_FLAGS -Wno-inconsistent-missing-override) + list(APPEND HIP_CXX_FLAGS -Wno-exceptions) + list(APPEND HIP_CXX_FLAGS -Wno-shift-count-negative) + list(APPEND HIP_CXX_FLAGS -Wno-shift-count-overflow) + list(APPEND HIP_CXX_FLAGS -Wno-unused-command-line-argument) + list(APPEND HIP_CXX_FLAGS -Wno-duplicate-decl-specifier) + list(APPEND HIP_CXX_FLAGS -DCAFFE2_USE_MIOPEN) + list(APPEND HIP_CXX_FLAGS -DROCBLAS_FP16=0) + + set(HIP_HCC_FLAGS ${HIP_CXX_FLAGS}) + # Ask hcc to generate device code during compilation so we can use + # host linker to link. + list(APPEND HIP_HCC_FLAGS -fno-gpu-rdc) + list(APPEND HIP_HCC_FLAGS -amdgpu-target=${HCC_AMDGPU_TARGET}) set(Caffe2_HIP_INCLUDES ${hip_INCLUDE_DIRS} ${hcc_INCLUDE_DIRS} ${hsa_INCLUDE_DIRS} ${rocrand_INCLUDE_DIRS} ${hiprand_INCLUDE_DIRS} ${rocblas_INCLUDE_DIRS} ${miopen_INCLUDE_DIRS} ${thrust_INCLUDE_DIRS} $ ${Caffe2_HIP_INCLUDES}) @@ -726,17 +732,6 @@ if(USE_ROCM) include_directories(SYSTEM ${HIPRAND_PATH}/include) include_directories(SYSTEM ${ROCRAND_PATH}/include) include_directories(SYSTEM ${THRUST_PATH}) - - # load HIP cmake module and load platform id - EXECUTE_PROCESS(COMMAND ${HIP_PATH}/bin/hipconfig -P OUTPUT_VARIABLE PLATFORM) - EXECUTE_PROCESS(COMMAND ${HIP_PATH}/bin/hipconfig --cpp_config OUTPUT_VARIABLE HIP_CXX_FLAGS) - - # Link with HIPCC https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_porting_guide.md#linking-with-hipcc - # SET(CMAKE_CXX_LINK_EXECUTABLE ${HIP_HIPCC_EXECUTABLE}) - - # Show message that we're using ROCm. - MESSAGE(STATUS "ROCM TRUE:") - MESSAGE(STATUS "CMAKE_CXX_COMPILER: " ${CMAKE_CXX_COMPILER}) endif() # ---[ NCCL diff --git a/cmake/public/LoadHIP.cmake b/cmake/public/LoadHIP.cmake index f50e1ff760f606..4541386fd14820 100644 --- a/cmake/public/LoadHIP.cmake +++ b/cmake/public/LoadHIP.cmake @@ -62,11 +62,8 @@ ENDIF() # THRUST_PATH IF(DEFINED ENV{THRUST_PATH}) SET(THRUST_PATH $ENV{THRUST_PATH}) -ELSEIF(DEFINED ENV{THRUST_ROOT}) - # TODO: Remove support of THRUST_ROOT environment variable - SET(THRUST_PATH $ENV{THRUST_ROOT}) ELSE() - SET(THRUST_PATH ${ROCM_PATH}/Thrust) + SET(THRUST_PATH ${ROCM_PATH}/include) ENDIF() # HIPRAND_PATH @@ -97,6 +94,12 @@ ELSE() SET(MIOPEN_PATH $ENV{MIOPEN_PATH}) ENDIF() +IF(NOT DEFINED ENV{HCC_AMDGPU_TARGET}) + SET(HCC_AMDGPU_TARGET gfx900) +ELSE() + SET(HCC_AMDGPU_TARGET $ENV{HCC_AMDGPU_TARGET}) +ENDIF() + # Add HIP to the CMAKE Module Path set(CMAKE_MODULE_PATH ${HIP_PATH}/cmake ${CMAKE_MODULE_PATH}) diff --git a/cmake/public/utils.cmake b/cmake/public/utils.cmake index ef9c9032c442d6..326a23d9af70f9 100644 --- a/cmake/public/utils.cmake +++ b/cmake/public/utils.cmake @@ -113,25 +113,15 @@ function(caffe2_binary_target target_name_or_src) endfunction() function(caffe2_hip_binary_target target_name_or_src) + caffe2_binary_target(${target_name_or_src}) + if (ARGC GREATER 1) set(__target ${target_name_or_src}) - prepend(__srcs "${CMAKE_CURRENT_SOURCE_DIR}/" "${ARGN}") else() get_filename_component(__target ${target_name_or_src} NAME_WE) - prepend(__srcs "${CMAKE_CURRENT_SOURCE_DIR}/" "${target_name_or_src}") endif() - - # These two lines are the only differences between - # caffe2_hip_binary_target and caffe2_binary_target - set_source_files_properties(${__srcs} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1) - hip_add_executable(${__target} ${__srcs}) - - target_link_libraries(${__target} ${Caffe2_MAIN_LIBS}) - # If we have Caffe2_MODULES defined, we will also link with the modules. - if (DEFINED Caffe2_MODULES) - target_link_libraries(${__target} ${Caffe2_MODULES}) - endif() - install(TARGETS ${__target} DESTINATION bin) + target_compile_options(${__target} PRIVATE ${HIP_CXX_FLAGS}) + target_include_directories(${__target} PRIVATE ${Caffe2_HIP_INCLUDES}) endfunction() ############################################################################## diff --git a/docker/caffe2/jenkins/build.sh b/docker/caffe2/jenkins/build.sh index 357a79549da887..9c7fa48e673f7a 100755 --- a/docker/caffe2/jenkins/build.sh +++ b/docker/caffe2/jenkins/build.sh @@ -39,6 +39,8 @@ fi if [[ "$image" == *rocm* ]]; then ROCM_VERSION="$(echo "${image}" | perl -n -e'/rocm(\d+\.\d+\.\d+|nightly)/ && print $1')" DOCKERFILE="${OS}-rocm/Dockerfile" + # newer cmake version needed + CMAKE_VERSION=3.6.3 fi if [[ "$image" == *conda* ]]; then @@ -66,6 +68,11 @@ if [[ "$image" == *-clang* ]]; then CLANG_VERSION="$(echo "${image}" | perl -n -e'/clang(\d+(\.\d+)?)/ && print $1')" fi + +if [[ "$image" == *-devtoolset* ]]; then + DEVTOOLSET_VERSION="$(echo "${image}" | perl -n -e'/devtoolset(\d+(\.\d+)?)/ && print $1')" +fi + # Copy over common scripts to directory containing the Dockerfile to build cp -a common/* "$(dirname ${DOCKERFILE})" @@ -84,6 +91,7 @@ docker build \ --build-arg "JENKINS_GID=${JENKINS_GID:-}" \ --build-arg "UBUNTU_VERSION=${UBUNTU_VERSION}" \ --build-arg "CENTOS_VERSION=${CENTOS_VERSION}" \ + --build-arg "DEVTOOLSET_VERSION=${DEVTOOLSET_VERSION}" \ --build-arg "PYTHON_VERSION=${PYTHON_VERSION}" \ --build-arg "ANACONDA_VERSION=${ANACONDA_VERSION}" \ --build-arg "CUDA_VERSION=${CUDA_VERSION}" \ diff --git a/docker/caffe2/jenkins/centos-rocm/.gitignore b/docker/caffe2/jenkins/centos-rocm/.gitignore new file mode 100644 index 00000000000000..c97f963b3da62e --- /dev/null +++ b/docker/caffe2/jenkins/centos-rocm/.gitignore @@ -0,0 +1 @@ +*.sh diff --git a/docker/caffe2/jenkins/centos-rocm/Dockerfile b/docker/caffe2/jenkins/centos-rocm/Dockerfile new file mode 100644 index 00000000000000..4b62208658ee08 --- /dev/null +++ b/docker/caffe2/jenkins/centos-rocm/Dockerfile @@ -0,0 +1,56 @@ +ARG CENTOS_VERSION +FROM centos:${CENTOS_VERSION} + +# Install required packages to build Caffe2 +ARG EC2 +ADD ./install_base.sh install_base.sh +RUN bash ./install_base.sh && rm install_base.sh + +# Install devtoolset +ARG DEVTOOLSET_VERSION +ADD ./install_devtoolset.sh install_devtoolset.sh +RUN bash ./install_devtoolset.sh +RUN rm install_devtoolset.sh +ENV BASH_ENV "/etc/profile" + +# Install rocm +ARG ROCM_VERSION +ADD ./install_rocm.sh install_rocm.sh +RUN bash ./install_rocm.sh +RUN rm install_rocm.sh +ENV PATH /opt/rocm/bin:$PATH +ENV PATH /opt/rocm/hcc/bin:$PATH +ENV PATH /opt/rocm/hip/bin:$PATH +ENV PATH /opt/rocm/opencl/bin:$PATH +ENV MIOPEN_DISABLE_CACHE 1 +ENV HIP_PLATFORM hcc +ENV LC_ALL en_US.utf8 +ENV LANG en_US.utf8 + +# Install non-default CMake version +ARG CMAKE_VERSION +ADD ./install_cmake.sh install_cmake.sh +RUN if [ -n "${CMAKE_VERSION}" ]; then bash ./install_cmake.sh; fi +RUN rm install_cmake.sh + +# Compile/install ccache for faster builds +ADD ./install_ccache.sh install_ccache.sh +RUN bash ./install_ccache.sh && rm install_ccache.sh + +# Install Python +ARG PYTHON_VERSION +ADD ./install_python.sh install_python.sh +RUN if [ -n "${PYTHON_VERSION}" ]; then bash ./install_python.sh; fi +RUN rm install_python.sh + +# (optional) Add Jenkins user +ARG JENKINS +ARG JENKINS_UID +ARG JENKINS_GID +ADD ./add_jenkins_user.sh add_jenkins_user.sh +RUN if [ -n "${JENKINS}" ]; then bash ./add_jenkins_user.sh; fi +RUN rm add_jenkins_user.sh + +# Include BUILD_ENVIRONMENT environment variable in image +ARG BUILD_ENVIRONMENT +ENV BUILD_ENVIRONMENT ${BUILD_ENVIRONMENT} diff --git a/docker/caffe2/jenkins/common/install_devtoolset.sh b/docker/caffe2/jenkins/common/install_devtoolset.sh new file mode 100755 index 00000000000000..bdae6375981383 --- /dev/null +++ b/docker/caffe2/jenkins/common/install_devtoolset.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +set -ex + +[ -n "$DEVTOOLSET_VERSION" ] + +yum install -y centos-release-scl +yum install -y devtoolset-$DEVTOOLSET_VERSION + +echo "source scl_source enable devtoolset-$DEVTOOLSET_VERSION" > "/etc/profile.d/devtoolset-$DEVTOOLSET_VERSION.sh" diff --git a/docker/caffe2/jenkins/common/install_rocm.sh b/docker/caffe2/jenkins/common/install_rocm.sh index b97443a270360b..ea779d7566a61e 100644 --- a/docker/caffe2/jenkins/common/install_rocm.sh +++ b/docker/caffe2/jenkins/common/install_rocm.sh @@ -7,6 +7,10 @@ install_ubuntu() { apt-get install -y wget apt-get install -y libopenblas-dev + # Need the libc++1 and libc++abi1 libraries to allow torch._C to load at runtime + apt-get install libc++1 + apt-get install libc++abi1 + DEB_ROCM_REPO=http://repo.radeon.com/rocm/misc/facebook/apt/.apt_1.9.white_rabbit/debian # Add rocm repository wget -qO - $DEB_ROCM_REPO/rocm.gpg.key | apt-key add - @@ -28,6 +32,15 @@ install_ubuntu() { rocrand \ hip-thrust + pushd /tmp + wget https://github.com/scchan/hcc/releases/download/19-host_linker_relative_path_rocdl/rocm19wb_20181109.tgz + tar -xzf rocm19wb_20181109.tgz + pushd rocm19wb_20181109/deb + apt install -y ./hcc-1.2.18445-Linux.deb ./hip_base-1.5.18435.deb ./hip_hcc-1.5.18435.deb ./hip_doc-1.5.18435.deb ./hip_samples-1.5.18435.deb + popd + rm -rf rocm19wb_20181109.tgz rocm19wb_20181109 + popd + # HIP has a bug that drops DEBUG symbols in generated MakeFiles. # https://github.com/ROCm-Developer-Tools/HIP/pull/588 if [[ -f /opt/rocm/hip/cmake/FindHIP.cmake ]]; then @@ -42,19 +55,59 @@ install_ubuntu() { } install_centos() { - echo "Not implemented yet" - exit 1 + + yum update -y + yum install -y wget + yum install -y openblas-devel + + yum install -y epel-release + yum install -y dkms kernel-headers-`uname -r` kernel-devel-`uname -r` + + echo "[ROCm]" > /etc/yum.repos.d/rocm.repo + echo "name=ROCm" >> /etc/yum.repos.d/rocm.repo + echo "baseurl=http://repo.radeon.com/rocm/misc/facebook/yum/.yum_1.9.white_rabbit/" >> /etc/yum.repos.d/rocm.repo + echo "enabled=1" >> /etc/yum.repos.d/rocm.repo + echo "gpgcheck=0" >> /etc/yum.repos.d/rocm.repo + + yum update -y + + yum install -y \ + rocm-dev \ + rocm-libs \ + rocm-utils \ + rocfft \ + miopen-hip \ + miopengemm \ + rocblas \ + rocm-profiler \ + cxlactivitylogger \ + rocsparse \ + hipsparse \ + rocrand + + + pushd /tmp + wget https://github.com/scchan/hcc/releases/download/19-host_linker_relative_path_rocdl/rocm19wb_20181109.tgz + tar -xzf rocm19wb_20181109.tgz + pushd rocm19wb_20181109/rpm + rpm -i --replacefiles hcc-1.2.18445-Linux.rpm hip_base-1.5.18435.rpm hip_hcc-1.5.18435.rpm hip_doc-1.5.18435.rpm hip_samples-1.5.18435.rpm + popd + rm -rf rocm19wb_20181109.tgz rocm19wb_20181109 + popd + + # Cleanup + yum clean all + rm -rf /var/cache/yum + rm -rf /var/lib/yum/yumdb + rm -rf /var/lib/yum/history + + # Needed for now, will be replaced once hip-thrust is packaged for CentOS + git clone --recursive https://github.com/ROCmSoftwarePlatform/Thrust.git /data/Thrust + rm -rf /data/Thrust/thrust/system/cuda/detail/cub-hip + git clone --recursive https://github.com/ROCmSoftwarePlatform/cub-hip.git /data/Thrust/thrust/system/cuda/detail/cub-hip + ln -s /data/Thrust/thrust /opt/rocm/include/thrust } -install_hip_thrust() { - # Needed for now, will be replaced soon - # We are now (redundantly) installing the Thrust package into another location (/opt/rocm/include/thrust) which we will - # switch over to - git clone --recursive https://github.com/ROCmSoftwarePlatform/Thrust.git /data/Thrust - rm -rf /data/Thrust/thrust/system/cuda/detail/cub-hip - git clone --recursive https://github.com/ROCmSoftwarePlatform/cub-hip.git /data/Thrust/thrust/system/cuda/detail/cub-hip -} - # Install Python packages depending on the base OS if [ -f /etc/lsb-release ]; then install_ubuntu @@ -64,5 +117,3 @@ else echo "Unable to determine OS..." exit 1 fi - -install_hip_thrust diff --git a/docker/caffe2/jenkins/ubuntu-rocm/Dockerfile b/docker/caffe2/jenkins/ubuntu-rocm/Dockerfile index 5825dda0f89132..d15373c72a6324 100644 --- a/docker/caffe2/jenkins/ubuntu-rocm/Dockerfile +++ b/docker/caffe2/jenkins/ubuntu-rocm/Dockerfile @@ -60,9 +60,10 @@ ENV PATH /opt/rocm/bin:$PATH ENV PATH /opt/rocm/hcc/bin:$PATH ENV PATH /opt/rocm/hip/bin:$PATH ENV PATH /opt/rocm/opencl/bin:$PATH -ENV THRUST_ROOT /data/Thrust ENV MIOPEN_DISABLE_CACHE 1 ENV HIP_PLATFORM hcc +ENV LANG C.UTF-8 +ENV LC_ALL C.UTF-8 # Compile/install ccache for faster builds ADD ./install_ccache.sh install_ccache.sh diff --git a/setup.py b/setup.py index f601033036ebd5..c6247ee6f53eb9 100644 --- a/setup.py +++ b/setup.py @@ -1021,6 +1021,7 @@ def run(self): rocfft_include_path = '/opt/rocm/rocfft/include' hiprand_include_path = '/opt/rocm/hiprand/include' rocrand_include_path = '/opt/rocm/rocrand/include' + thrust_include_path = '/opt/rocm/include/' hip_lib_path = '/opt/rocm/hip/lib' hcc_lib_path = '/opt/rocm/hcc/lib' include_dirs.append(rocm_include_path) @@ -1030,6 +1031,7 @@ def run(self): include_dirs.append(hipsparse_include_path) include_dirs.append(hiprand_include_path) include_dirs.append(rocrand_include_path) + include_dirs.append(thrust_include_path) include_dirs.append(tmp_install_path + "/include/THCUNN") extra_link_args.append('-L' + hip_lib_path) extra_link_args.append('-Wl,-rpath,' + hip_lib_path) diff --git a/tools/amd_build/build_pytorch_amd.py b/tools/amd_build/build_pytorch_amd.py index eb09baaa7862b9..07fdd684534ad0 100644 --- a/tools/amd_build/build_pytorch_amd.py +++ b/tools/amd_build/build_pytorch_amd.py @@ -27,24 +27,6 @@ for filename in os.listdir(os.path.join(amd_build_dir, "patches")): subprocess.Popen(["git", "apply", os.path.join(patch_folder, filename)], cwd=proj_dir) -# HIPCC Compiler doesn't provide host defines - Automatically include them. -for root, _, files in os.walk(os.path.join(proj_dir, "aten/src/ATen")): - for filename in files: - if filename.endswith(".cu") or filename.endswith(".cuh"): - filepath = os.path.join(root, filename) - - # Add the include header! - with open(filepath, "r+") as f: - txt = f.read() - result = '#include "hip/hip_runtime.h"\n%s' % txt - f.seek(0) - f.write(result) - f.truncate() - f.flush() - - # Flush to disk - os.fsync(f) - # Make various replacements inside AMD_BUILD/torch directory ignore_files = ["csrc/autograd/profiler.h", "csrc/autograd/profiler.cpp", "csrc/cuda/cuda_check.h"] diff --git a/tools/amd_build/pyHIPIFY/cuda_to_hip_mappings.py b/tools/amd_build/pyHIPIFY/cuda_to_hip_mappings.py index dab4f5b01c39e8..91dbee2599eaef 100644 --- a/tools/amd_build/pyHIPIFY/cuda_to_hip_mappings.py +++ b/tools/amd_build/pyHIPIFY/cuda_to_hip_mappings.py @@ -241,7 +241,10 @@ ]) CUDA_INCLUDE_MAP = collections.OrderedDict([ - ("cuda.h", ("hip/hip_runtime.h", CONV_INCLUDE_CUDA_MAIN_H, API_DRIVER)), + # since pytorch uses "\b{pattern}\b" as the actual re pattern, + # patterns listed here have to begin and end with alnum chars + ("include