From 4b1f3f4292d1be72e04cc3804ef7e39230f83f59 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Tue, 14 Nov 2023 09:30:44 -0800 Subject: [PATCH] CMake: AMReX_SYCL_PARALLEL_LINK_JOBS Add AMReX_SYCL_PARALLEL_LINK_JOBS option to the CMake build system. Adjust SYCL's RelWithDebInfo mode to the recommendation of Intel VTune. This is a follow-up on #3498 that made similar changes to GNU Make. --- .github/workflows/intel.yml | 12 +++++--- Docs/sphinx_documentation/source/GPU.rst | 36 +++++++++++++----------- Tools/CMake/AMReXFlagsTargets.cmake | 2 +- Tools/CMake/AMReXOptions.cmake | 10 +++++++ Tools/CMake/AMReXSYCL.cmake | 6 ++++ 5 files changed, 44 insertions(+), 22 deletions(-) diff --git a/.github/workflows/intel.yml b/.github/workflows/intel.yml index 6133e666fad..a37f1b7c029 100644 --- a/.github/workflows/intel.yml +++ b/.github/workflows/intel.yml @@ -44,7 +44,8 @@ jobs: -DCMAKE_C_COMPILER=$(which icx) \ -DCMAKE_CXX_COMPILER=$(which icpx) \ -DCMAKE_Fortran_COMPILER=$(which ifx) \ - -DCMAKE_CXX_COMPILER_LAUNCHER=ccache + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ + -DAMReX_SYCL_PARALLEL_LINK_JOBS=2 cmake --build build --parallel 2 ccache -s @@ -86,7 +87,8 @@ jobs: -DAMReX_GPU_BACKEND=SYCL \ -DCMAKE_C_COMPILER=$(which icx) \ -DCMAKE_CXX_COMPILER=$(which icpx) \ - -DCMAKE_CXX_COMPILER_LAUNCHER=ccache + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ + -DAMReX_SYCL_PARALLEL_LINK_JOBS=2 cmake --build build --parallel 2 ccache -s @@ -136,7 +138,8 @@ jobs: -DAMReX_GPU_BACKEND=SYCL \ -DCMAKE_C_COMPILER=$(which icx) \ -DCMAKE_CXX_COMPILER=$(which clang++) \ - -DCMAKE_CXX_COMPILER_LAUNCHER=ccache + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ + -DAMReX_SYCL_PARALLEL_LINK_JOBS=2 cmake --build build --parallel 2 ccache -s @@ -186,7 +189,8 @@ jobs: -DAMReX_SYCL_SUB_GROUP_SIZE=64 \ -DCMAKE_C_COMPILER=$(which icx) \ -DCMAKE_CXX_COMPILER=$(which clang++) \ - -DCMAKE_CXX_COMPILER_LAUNCHER=ccache + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ + -DAMReX_SYCL_PARALLEL_LINK_JOBS=2 cmake --build build --parallel 2 ccache -s diff --git a/Docs/sphinx_documentation/source/GPU.rst b/Docs/sphinx_documentation/source/GPU.rst index aff060e9166..c09120e6a7d 100644 --- a/Docs/sphinx_documentation/source/GPU.rst +++ b/Docs/sphinx_documentation/source/GPU.rst @@ -217,7 +217,7 @@ variables to configure the build +------------------------------+-------------------------------------------------+-------------+-----------------+ | SYCL_SUB_GROUP_SIZE | Specify subgroup size | 32 | 64, 32, 16 | +------------------------------+-------------------------------------------------+-------------+-----------------+ - | SYCL_MAX_PARALLEL_LINK_JOBS | Number of parallel jobs in device link | 1 | 1, 2, 3, etc. | + | SYCL_PARALLEL_LINK_JOBS | Number of parallel jobs in device link | 1 | 1, 2, 3, etc. | +------------------------------+-------------------------------------------------+-------------+-----------------+ .. raw:: latex @@ -428,22 +428,24 @@ Below is an example configuration for SYCL: .. table:: AMReX SYCL-specific build options - +------------------------------+-------------------------------------------------+-------------+-----------------+ - | Variable Name | Description | Default | Possible values | - +==============================+=================================================+=============+=================+ - | AMReX_SYCL_AOT | Enable SYCL ahead-of-time compilation | NO | YES, NO | - +------------------------------+-------------------------------------------------+-------------+-----------------+ - | AMReX_SYCL_AOT_GRF_MODE | Specify AOT register file mode | Default | Default, Large, | - | | | | AutoLarge | - +------------------------------+-------------------------------------------------+-------------+-----------------+ - | AMREX_INTEL_ARCH | Specify target if AOT is enabled | None | pvc, etc. | - +------------------------------+-------------------------------------------------+-------------+-----------------+ - | AMReX_SYCL_SPLIT_KERNEL | Enable SYCL kernel splitting | YES | YES, NO | - +------------------------------+-------------------------------------------------+-------------+-----------------+ - | AMReX_SYCL_ONEDPL | Enable SYCL's oneDPL algorithms | NO | YES, NO | - +------------------------------+-------------------------------------------------+-------------+-----------------+ - | AMReX_SYCL_SUB_GROUP_SIZE | Specify subgroup size | 32 | 64, 32, 16 | - +------------------------------+-------------------------------------------------+-------------+-----------------+ + +-------------------------------+-------------------------------------------------+-------------+------------------+ + | Variable Name | Description | Default | Possible values | + +===============================+=================================================+=============+==================+ + | AMReX_SYCL_AOT | Enable SYCL ahead-of-time compilation | NO | YES, NO | + +-------------------------------+-------------------------------------------------+-------------+------------------+ + | AMReX_SYCL_AOT_GRF_MODE | Specify AOT register file mode | Default | Default, Large, | + | | | | AutoLarge | + +-------------------------------+-------------------------------------------------+-------------+------------------+ + | AMREX_INTEL_ARCH | Specify target if AOT is enabled | None | pvc, etc. | + +-------------------------------+-------------------------------------------------+-------------+------------------+ + | AMReX_SYCL_SPLIT_KERNEL | Enable SYCL kernel splitting | YES | YES, NO | + +-------------------------------+-------------------------------------------------+-------------+------------------+ + | AMReX_SYCL_ONEDPL | Enable SYCL's oneDPL algorithms | NO | YES, NO | + +-------------------------------+-------------------------------------------------+-------------+------------------+ + | AMReX_SYCL_SUB_GROUP_SIZE | Specify subgroup size | 32 | 64, 32, 16 | + +-------------------------------+-------------------------------------------------+-------------+------------------+ + | AMReX_SYCL_PARALLEL_LINK_JOBS | Specify the number of parallel link jobs | 1 | positive integer | + +-------------------------------+-------------------------------------------------+-------------+------------------+ .. raw:: latex \end{center} diff --git a/Tools/CMake/AMReXFlagsTargets.cmake b/Tools/CMake/AMReXFlagsTargets.cmake index 9e3073cd53f..a2e86b2fbd3 100644 --- a/Tools/CMake/AMReXFlagsTargets.cmake +++ b/Tools/CMake/AMReXFlagsTargets.cmake @@ -89,7 +89,7 @@ target_compile_options( Flags_CXX $<${_cxx_appleclang_rwdbg}:> $<${_cxx_appleclang_rel}:> $<${_cxx_intelllvm_dbg}:-O0 -Wall -Wextra -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable> - $<${_cxx_intelllvm_rwdbg}:-g1> + $<${_cxx_intelllvm_rwdbg}:-gline-tables-only -fdebug-info-for-profiling> # recommended by Intel VTune $<${_cxx_intelllvm_rel}:> ) diff --git a/Tools/CMake/AMReXOptions.cmake b/Tools/CMake/AMReXOptions.cmake index 80196639984..be7270528b2 100644 --- a/Tools/CMake/AMReXOptions.cmake +++ b/Tools/CMake/AMReXOptions.cmake @@ -213,6 +213,16 @@ if (AMReX_SYCL) endif() endif() + set(AMReX_SYCL_PARALLEL_LINK_JOBS_DEFAULT 1) + if (DEFINED ENV{AMREX_SYCL_PARALLEL_LINK_JOBS}) + set(AMReX_SYCL_PARALLEL_LINK_JOBS_DEFAULT "$ENV{AMREX_SYCL_PARALLEL_LINK_JOBS}") + endif() + set(AMReX_SYCL_PARALLEL_LINK_JOBS ${AMReX_SYCL_PARALLEL_LINK_JOBS_DEFAULT} + CACHE STRING "SYCL max parallel link jobs") + if (NOT AMReX_SYCL_PARALLEL_LINK_JOBS GREATER_EQUAL 1 OR + NOT AMReX_SYCL_PARALLEL_LINK_JOBS MATCHES "^[1-9][0-9]*$") + message(FATAL_ERROR "AMReX_SYCL_PARALLEL_LINK_JOBS (${AMReX_SYCL_PARALLEL_LINK_JOBS}) must be an integer >= 1") + endif() endif () # --- HIP ---- diff --git a/Tools/CMake/AMReXSYCL.cmake b/Tools/CMake/AMReXSYCL.cmake index a67571dc412..a2f9c40b2be 100644 --- a/Tools/CMake/AMReXSYCL.cmake +++ b/Tools/CMake/AMReXSYCL.cmake @@ -88,4 +88,10 @@ if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND "${CMAKE_BUILD_TYPE}" MATCHES "Debug" "$<${_cxx_sycl}:-fsycl-link-huge-device-code>" ) endif () +if (AMReX_SYCL_PARALLEL_LINK_JOBS GREATER 1) + target_link_options( SYCL + INTERFACE + $<${_cxx_sycl}:-fsycl-max-parallel-link-jobs=${AMReX_SYCL_PARALLEL_LINK_JOBS}>) +endif() + unset(_cxx_sycl)