Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Small update to gpu compiler flags to support compute capabilities 8.0 and 8.6 as well. #113

Merged
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
126 changes: 72 additions & 54 deletions select_compute_arch.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
# - "Auto" detects local machine GPU compute arch at runtime.
# - "Common" and "All" cover common and entire subsets of architectures
# ARCH_AND_PTX : NAME | NUM.NUM | NUM.NUM(NUM.NUM) | NUM.NUM+PTX
# NAME: Fermi Kepler Maxwell Kepler+Tegra Kepler+Tesla Maxwell+Tegra Pascal Volta Turing
# NAME: Fermi Kepler Maxwell Kepler+Tegra Kepler+Tesla Maxwell+Tegra Pascal Volta Turing Ampere
# NUM: Any number. Only those pairs are currently accepted by NVCC though:
# 2.0 2.1 3.0 3.2 3.5 3.7 5.0 5.2 5.3 6.0 6.2 7.0 7.2 7.5
# 2.0 2.1 3.0 3.2 3.5 3.7 5.0 5.2 5.3 6.0 6.2 7.0 7.2 7.5 8.0 8.6
# Returns LIST of flags to be added to CUDA_NVCC_FLAGS in ${out_variable}
# Additionally, sets ${out_variable}_readable to the resulting numeric list
# Example:
Expand All @@ -25,70 +25,85 @@ if(CMAKE_CUDA_COMPILER_LOADED) # CUDA as a language
endif()

# See: https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#gpu-feature-list
# Additions, deprecations, and removals can be found in the release notes:
# https://developer.nvidia.com/cuda-toolkit-archive

if(CUDA_VERSION VERSION_LESS "11.0")
# This list will be used for CUDA_ARCH_NAME = All option
set(CUDA_KNOWN_GPU_ARCHITECTURES "Fermi" "Kepler" "Maxwell")

# This list will be used for CUDA_ARCH_NAME = Common option (enabled by default)
set(CUDA_COMMON_GPU_ARCHITECTURES "3.0" "3.5")
else()
set(CUDA_KNOWN_GPU_ARCHITECTURES "")
set(CUDA_COMMON_GPU_ARCHITECTURES "")
endif()

# as of now (Sept 2020), all CUDA versions support Maxwell GPUs
list(APPEND CUDA_KNOWN_GPU_ARCHITECTURES "Maxwell")
list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "5.0")

if(CUDA_VERSION VERSION_LESS "7.0")
set(CUDA_LIMIT_GPU_ARCHITECTURE "4.9")
endif()
# The initial status here is for CUDA 7.0
set(CUDA_KNOWN_GPU_ARCHITECTURES "Fermi" "Kepler" "Maxwell" "Kepler+Tegra" "Kepler+Tesla" "Maxwell+Tegra")
set(CUDA_COMMON_GPU_ARCHITECTURES "2.0" "2.1" "3.0" "3.5" "5.0" "5.3")
set(CUDA_LIMIT_GPU_ARCHITECTURE "6.0")
set(CUDA_ALL_GPU_ARCHITECTURES "2.0" "2.1" "3.0" "3.2" "3.5" "3.7" "5.0" "5.2" "5.3")
set(_CUDA_MAX_COMMON_ARCHITECTURE "5.2+PTX")

# This list is used to filter CUDA archs when autodetecting
set(CUDA_ALL_GPU_ARCHITECTURES "3.0" "3.2" "3.5" "5.0")

if(CUDA_VERSION VERSION_GREATER "6.9")
list(APPEND CUDA_KNOWN_GPU_ARCHITECTURES "Kepler+Tegra" "Kepler+Tesla" "Maxwell+Tegra")
list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "5.2")

if(CUDA_VERSION VERSION_LESS "8.0")
list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "5.2+PTX")
set(CUDA_LIMIT_GPU_ARCHITECTURE "5.9")
endif()
endif()

if(CUDA_VERSION VERSION_GREATER "7.9")
if(CUDA_VERSION VERSION_GREATER_EQUAL "8.0")
list(APPEND CUDA_KNOWN_GPU_ARCHITECTURES "Pascal")
list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "6.0" "6.1")
list(APPEND CUDA_ALL_GPU_ARCHITECTURES "6.0" "6.1" "6.2")

if(CUDA_VERSION VERSION_LESS "9.0")
list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "6.1+PTX")
set(CUDA_LIMIT_GPU_ARCHITECTURE "6.9")
endif()
set(_CUDA_MAX_COMMON_ARCHITECTURE "6.2+PTX")
set(CUDA_LIMIT_GPU_ARCHITECTURE "7.0")

list(REMOVE_ITEM CUDA_COMMON_GPU_ARCHITECTURES "2.0" "2.1")
endif ()

if(CUDA_VERSION VERSION_GREATER "8.9")
if(CUDA_VERSION VERSION_GREATER_EQUAL "9.0")
list(APPEND CUDA_KNOWN_GPU_ARCHITECTURES "Volta")
list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "7.0" "7.0+PTX")
list(APPEND CUDA_ALL_GPU_ARCHITECTURES "7.0" "7.0+PTX" "7.2" "7.2+PTX")
list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "7.0")
list(APPEND CUDA_ALL_GPU_ARCHITECTURES "7.0" "7.2")

if(CUDA_VERSION VERSION_LESS "10.0")
set(CUDA_LIMIT_GPU_ARCHITECTURE "7.9")
endif()
set(_CUDA_MAX_COMMON_ARCHITECTURE "7.2+PTX")
set(CUDA_LIMIT_GPU_ARCHITECTURE "8.0")

list(REMOVE_ITEM CUDA_KNOWN_GPU_ARCHITECTURES "Fermi")
list(REMOVE_ITEM CUDA_ALL_GPU_ARCHITECTURES "2.0" "2.1")
endif()

if(CUDA_VERSION VERSION_GREATER "9.9")
if(CUDA_VERSION VERSION_GREATER_EQUAL "10.0")
list(APPEND CUDA_KNOWN_GPU_ARCHITECTURES "Turing")
list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "7.5" "7.5+PTX")
list(APPEND CUDA_ALL_GPU_ARCHITECTURES "7.5" "7.5+PTX")
list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "7.5")
list(APPEND CUDA_ALL_GPU_ARCHITECTURES "7.5")

if(CUDA_VERSION VERSION_LESS "11.0")
set(CUDA_LIMIT_GPU_ARCHITECTURE "8.9")
endif()
set(_CUDA_MAX_COMMON_ARCHITECTURE "7.5+PTX")
set(CUDA_LIMIT_GPU_ARCHITECTURE "8.0")

list(REMOVE_ITEM CUDA_COMMON_GPU_ARCHITECTURES "3.0")
endif()

# https://docs.nvidia.com/cuda/archive/11.0/cuda-toolkit-release-notes/index.html#cuda-general-new-features
# https://docs.nvidia.com/cuda/archive/11.0/cuda-toolkit-release-notes/index.html#deprecated-features
if(CUDA_VERSION VERSION_GREATER_EQUAL "11.0")
list(APPEND CUDA_KNOWN_GPU_ARCHITECTURES "Ampere")
list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "8.0")
list(APPEND CUDA_ALL_GPU_ARCHITECTURES "8.0")

set(_CUDA_MAX_COMMON_ARCHITECTURE "8.0+PTX")
set(CUDA_LIMIT_GPU_ARCHITECTURE "8.6")

list(REMOVE_ITEM CUDA_COMMON_GPU_ARCHITECTURES "3.5" "5.0")
list(REMOVE_ITEM CUDA_ALL_GPU_ARCHITECTURES "3.0" "3.2")
endif()

if(CUDA_VERSION VERSION_GREATER_EQUAL "11.1")
list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "8.6")
list(APPEND CUDA_ALL_GPU_ARCHITECTURES "8.6")

set(_CUDA_MAX_COMMON_ARCHITECTURE "8.6+PTX")
set(CUDA_LIMIT_GPU_ARCHITECTURE "9.0")
endif()

list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "${_CUDA_MAX_COMMON_ARCHITECTURE}")

# Check with: cmake -DCUDA_VERSION=7.0 -P select_compute_arch.cmake
if(DEFINED CMAKE_SCRIPT_MODE_FILE)
include(CMakePrintHelpers)
cmake_print_variables(CUDA_KNOWN_GPU_ARCHITECTURES)
cmake_print_variables(CUDA_COMMON_GPU_ARCHITECTURES)
cmake_print_variables(CUDA_LIMIT_GPU_ARCHITECTURE)
cmake_print_variables(CUDA_ALL_GPU_ARCHITECTURES)
endif()


################################################################################################
# A function for automatic detection of GPUs installed (if autodetection is enabled)
# Usage:
Expand Down Expand Up @@ -120,10 +135,10 @@ function(CUDA_DETECT_INSTALLED_GPUS OUT_VARIABLE)
"}\n")

if(CMAKE_CUDA_COMPILER_LOADED) # CUDA as a language
try_run(run_result compile_result ${PROJECT_BINARY_DIR} ${file}
try_run(run_result compile_result SOURCES ${file}
RUN_OUTPUT_VARIABLE compute_capabilities)
else()
try_run(run_result compile_result ${PROJECT_BINARY_DIR} ${file}
try_run(run_result compile_result SOURCES ${file}
CMAKE_FLAGS "-DINCLUDE_DIRECTORIES=${CUDA_INCLUDE_DIRS}"
LINK_LIBRARIES ${CUDA_LIBRARIES}
RUN_OUTPUT_VARIABLE compute_capabilities)
Expand All @@ -147,7 +162,7 @@ function(CUDA_DETECT_INSTALLED_GPUS OUT_VARIABLE)
set(CUDA_GPU_DETECT_OUTPUT_FILTERED "")
separate_arguments(CUDA_GPU_DETECT_OUTPUT)
foreach(ITEM IN ITEMS ${CUDA_GPU_DETECT_OUTPUT})
if(CUDA_LIMIT_GPU_ARCHITECTURE AND ITEM VERSION_GREATER CUDA_LIMIT_GPU_ARCHITECTURE)
if(CUDA_LIMIT_GPU_ARCHITECTURE AND ITEM VERSION_GREATER_EQUAL CUDA_LIMIT_GPU_ARCHITECTURE)
list(GET CUDA_COMMON_GPU_ARCHITECTURES -1 NEWITEM)
string(APPEND CUDA_GPU_DETECT_OUTPUT_FILTERED " ${NEWITEM}")
else()
Expand All @@ -163,7 +178,7 @@ endfunction()
################################################################################################
# Function for selecting GPU arch flags for nvcc based on CUDA architectures from parameter list
# Usage:
# CUDA_SELECT_NVCC_ARCH_FLAGS(out_variable [list of CUDA compute archs])
# SELECT_NVCC_ARCH_FLAGS(out_variable [list of CUDA compute archs])
function(CUDA_SELECT_NVCC_ARCH_FLAGS out_variable)
set(CUDA_ARCH_LIST "${ARGN}")

Expand Down Expand Up @@ -223,6 +238,9 @@ function(CUDA_SELECT_NVCC_ARCH_FLAGS out_variable)
elseif(${arch_name} STREQUAL "Turing")
set(arch_bin 7.5)
set(arch_ptx 7.5)
elseif(${arch_name} STREQUAL "Ampere")
set(arch_bin 8.0)
set(arch_ptx 8.0)
else()
message(SEND_ERROR "Unknown CUDA Architecture Name ${arch_name} in CUDA_SELECT_NVCC_ARCH_FLAGS")
endif()
Expand Down Expand Up @@ -263,7 +281,7 @@ function(CUDA_SELECT_NVCC_ARCH_FLAGS out_variable)
list(APPEND nvcc_archs_readable sm_${CMAKE_MATCH_1})
else()
# User didn't explicitly specify ARCH for the concrete CODE, we assume ARCH=CODE
list(APPEND nvcc_flags "-gencode arch=compute_${arch},code=sm_${arch}")
list(APPEND nvcc_flags -gencode arch=compute_${arch},code=sm_${arch})
list(APPEND nvcc_archs_readable sm_${arch})
endif()
endforeach()
Expand Down
Loading