Skip to content

Commit

Permalink
merge main; add more methods to INCOMING_MODULE_JS_API
Browse files Browse the repository at this point in the history
  • Loading branch information
guschmue committed Nov 22, 2023
2 parents 16dc516 + 3f0ebd6 commit 2fcb5a8
Show file tree
Hide file tree
Showing 409 changed files with 27,539 additions and 19,554 deletions.
3 changes: 2 additions & 1 deletion .github/workflows/stale.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ jobs:
# Override exempt-all-assignees but only to exempt the issues with an assignee to be marked as stale automatically
exempt-all-issue-assignees: true
# Used to ignore the issues and pull requests created before the start date
start-date: 20220419
# Start date should be April 19, 2022 - corresponds to the day previous stale bot stopped working
start-date: '2022-04-19T00:00:00Z'
# Number of days without activity before the actions/stale action labels an issue
days-before-issue-stale: 30
# Number of days without activity before the actions/stale action closes an issue
Expand Down
19 changes: 0 additions & 19 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,25 +16,6 @@
},
// Enable Python linting and Pylance type checking
"python.analysis.typeCheckingMode": "basic",
"python.formatting.provider": "black",
"python.formatting.blackArgs": [
"--line-length",
"120"
],
"python.sortImports.args": [
"--profile",
"black",
"--line-length",
"120"
],
"python.linting.enabled": true,
"python.linting.flake8Enabled": true,
"python.linting.pylintEnabled": true,
"python.linting.pydocstyleEnabled": true,
"python.linting.pydocstyleArgs": [
"--convention=google"
],
"python.linting.banditEnabled": true,
"cpplint.lineLength": 120,
"cpplint.filters": [
"-build/include_subdir",
Expand Down
4 changes: 2 additions & 2 deletions cgmanifests/generated/cgmanifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@
"component": {
"type": "git",
"git": {
"commitHash": "c4f6b8c6bc94ff69048492fb34df0dfaf1983933",
"commitHash": "6f47420213f757831fae65c686aa471749fa8d60",
"repositoryUrl": "https://github.com/NVIDIA/cutlass.git"
},
"comments": "cutlass"
Expand Down Expand Up @@ -316,7 +316,7 @@
"component": {
"type": "git",
"git": {
"commitHash": "d52ec01652b7d620386251db92455968d8d90bdc",
"commitHash": "a4f72a314a85732ed67d5aa8d1088d207a7e0e61",
"repositoryUrl": "https://github.com/ROCmSoftwarePlatform/composable_kernel.git"
},
"comments": "composable_kernel"
Expand Down
18 changes: 15 additions & 3 deletions cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -114,9 +114,7 @@ option(onnxruntime_ENABLE_LTO "Enable link time optimization" OFF)
option(onnxruntime_CROSS_COMPILING "Cross compiling onnx runtime" OFF)
option(onnxruntime_GCOV_COVERAGE "Compile with options necessary to run code coverage" OFF)
option(onnxruntime_DONT_VECTORIZE "Do not vectorize operations in Eigen" OFF)

#It's preferred to turn it OFF when onnxruntime is dynamically linked to PROTOBUF. But Tensort always required the full version of protobuf.
cmake_dependent_option(onnxruntime_USE_FULL_PROTOBUF "Link to libprotobuf instead of libprotobuf-lite when this option is ON" OFF "NOT onnxruntime_USE_TENSORRT" ON)
option(onnxruntime_USE_FULL_PROTOBUF "Link to libprotobuf instead of libprotobuf-lite when this option is ON" OFF)
option(tensorflow_C_PACKAGE_PATH "Path to tensorflow C package installation dir")
option(onnxruntime_ENABLE_LANGUAGE_INTEROP_OPS "Enable operator implemented in language other than cpp" OFF)
option(onnxruntime_DEBUG_NODE_INPUTS_OUTPUTS "Dump debug information about node inputs and outputs when executing the model." OFF)
Expand Down Expand Up @@ -526,7 +524,21 @@ if(NOT WIN32 AND NOT CMAKE_SYSTEM_NAME STREQUAL "Android")
find_package(Iconv REQUIRED)
set(ICONV_LIB Iconv::Iconv)
endif()

find_package(Patch)
if (WIN32 AND NOT Patch_FOUND)
# work around CI machines missing patch from the git install by falling back to the binary in this repo.
# replicate what happens in https://github.com/Kitware/CMake/blob/master/Modules/FindPatch.cmake but without
# the hardcoded suffixes in the path to the patch binary.
find_program(Patch_EXECUTABLE NAMES patch PATHS ${PROJECT_SOURCE_DIR}/external/git.Win32.2.41.03.patch)
if(Patch_EXECUTABLE)
set(Patch_FOUND 1)
if (NOT TARGET Patch::patch)
add_executable(Patch::patch IMPORTED)
set_property(TARGET Patch::patch PROPERTY IMPORTED_LOCATION ${Patch_EXECUTABLE})
endif()
endif()
endif()
if(Patch_FOUND)
message("Patch found: ${Patch_EXECUTABLE}")
endif()
Expand Down
4 changes: 2 additions & 2 deletions cmake/deps.txt
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ pytorch_cpuinfo;https://github.com/pytorch/cpuinfo/archive/959002f82d7962a473d8b
re2;https://github.com/google/re2/archive/refs/tags/2022-06-01.zip;aa77313b76e91b531ee7f3e45f004c6a502a5374
safeint;https://github.com/dcleblanc/SafeInt/archive/refs/tags/3.0.28.zip;23f252040ff6cb9f1fd18575b32fa8fb5928daac
tensorboard;https://github.com/tensorflow/tensorboard/archive/373eb09e4c5d2b3cc2493f0949dc4be6b6a45e81.zip;67b833913605a4f3f499894ab11528a702c2b381
cutlass;https://github.com/NVIDIA/cutlass/archive/refs/tags/v3.0.0.zip;0f95b3c1fc1bd1175c4a90b2c9e39074d1bccefd
cutlass;https://github.com/NVIDIA/cutlass/archive/refs/tags/v3.1.0.zip;757f90a795034a89d4f48a79d1f009f7a04c8dee
utf8_range;https://github.com/protocolbuffers/utf8_range/archive/72c943dea2b9240cd09efde15191e144bc7c7d38.zip;9925739c9debc0efa2adcb194d371a35b6a03156
extensions;https://github.com/microsoft/onnxruntime-extensions/archive/94142d8391c9791ec71c38336436319a2d4ac7a0.zip;4365ac5140338b4cb75a39944a4be276e3829b3c
composable_kernel;https://github.com/ROCmSoftwarePlatform/composable_kernel/archive/d52ec01652b7d620386251db92455968d8d90bdc.zip;6b5ce8edf3625f8817086c194fbf94b664e1b0e0
composable_kernel;https://github.com/ROCmSoftwarePlatform/composable_kernel/archive/a4f72a314a85732ed67d5aa8d1088d207a7e0e61.zip;f57357ab6d300e207a632d034ebc8aa036a090d9
1 change: 0 additions & 1 deletion cmake/external/abseil-cpp.natvis
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
<Intrinsic Name="_capacity" Expression="_commonfields().capacity_"/>
<Intrinsic Name="_control" Expression="_commonfields().control_"/>
<Intrinsic Name="_slots" Expression="(slot_type*)(_commonfields().slots_)"/>
<DisplayString Condition="_size() == 0">empty</DisplayString>
<DisplayString IncludeView="noparens">size={ _size() }</DisplayString>
<DisplayString ExcludeView="noparens">size=({_size()})</DisplayString>
<Expand>
Expand Down
3 changes: 2 additions & 1 deletion cmake/external/composable_kernel.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,14 @@ if(NOT composable_kernel_POPULATED)
FetchContent_Populate(composable_kernel)
set(BUILD_DEV OFF CACHE BOOL "Disable -Weverything, otherwise, error: 'constexpr' specifier is incompatible with C++98 [-Werror,-Wc++98-compat]" FORCE)
# Exclude i8 device gemm instances due to excessive long compilation time and not being used
set(DTYPES fp32 fp16 bf16)
set(DTYPES fp32 fp16 bf16 fp8)
set(INSTANCES_ONLY ON)
add_subdirectory(${composable_kernel_SOURCE_DIR} ${composable_kernel_BINARY_DIR} EXCLUDE_FROM_ALL)

add_library(onnxruntime_composable_kernel_includes INTERFACE)
target_include_directories(onnxruntime_composable_kernel_includes INTERFACE
${composable_kernel_SOURCE_DIR}/include
${composable_kernel_BINARY_DIR}/include
${composable_kernel_SOURCE_DIR}/library/include)
target_compile_definitions(onnxruntime_composable_kernel_includes INTERFACE __fp32__ __fp16__ __bf16__)
endif()
1 change: 0 additions & 1 deletion cmake/external/cutlass.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ if (onnxruntime_USE_FLASH_ATTENTION OR onnxruntime_USE_MEMORY_EFFICIENT_ATTENTIO
cutlass
URL ${DEP_URL_cutlass}
URL_HASH SHA1=${DEP_SHA1_cutlass}
PATCH_COMMAND ${Patch_EXECUTABLE} --binary --ignore-whitespace -p1 < ${PROJECT_SOURCE_DIR}/patches/cutlass/cutlass.patch
)

FetchContent_GetProperties(cutlass)
Expand Down
21 changes: 6 additions & 15 deletions cmake/external/eigen.cmake
Original file line number Diff line number Diff line change
@@ -1,23 +1,14 @@

if (onnxruntime_USE_PREINSTALLED_EIGEN)
add_library(eigen INTERFACE)
file(TO_CMAKE_PATH ${eigen_SOURCE_PATH} eigen_INCLUDE_DIRS)
target_include_directories(eigen INTERFACE ${eigen_INCLUDE_DIRS})
else ()
if (onnxruntime_USE_ACL)
FetchContent_Declare(
eigen
URL ${DEP_URL_eigen}
URL_HASH SHA1=${DEP_SHA1_eigen}
PATCH_COMMAND ${Patch_EXECUTABLE} --ignore-space-change --ignore-whitespace < ${PROJECT_SOURCE_DIR}/patches/eigen/Fix_Eigen_Build_Break.patch
)
else()
FetchContent_Declare(
eigen
URL ${DEP_URL_eigen}
URL_HASH SHA1=${DEP_SHA1_eigen}
)
endif()
FetchContent_Declare(
eigen
URL ${DEP_URL_eigen}
URL_HASH SHA1=${DEP_SHA1_eigen}
)

FetchContent_Populate(eigen)
set(eigen_INCLUDE_DIRS "${eigen_SOURCE_DIR}")
endif()
Binary file not shown.
Binary file not shown.
Binary file added cmake/external/git.Win32.2.41.03.patch/patch.exe
Binary file not shown.
1 change: 1 addition & 0 deletions cmake/external/onnxruntime_external_deps.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,7 @@ if(onnxruntime_USE_CUDA)
URL ${DEP_URL_microsoft_gsl}
URL_HASH SHA1=${DEP_SHA1_microsoft_gsl}
PATCH_COMMAND ${Patch_EXECUTABLE} --binary --ignore-whitespace -p1 < ${PROJECT_SOURCE_DIR}/patches/gsl/1064.patch
FIND_PACKAGE_ARGS 4.0 NAMES Microsoft.GSL
)
else()
FetchContent_Declare(
Expand Down
3 changes: 3 additions & 0 deletions cmake/onnxruntime_mlas.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ onnxruntime_add_static_library(onnxruntime_mlas
${MLAS_SRC_DIR}/qpostprocessor.cpp
${MLAS_SRC_DIR}/qlgavgpool.cpp
${MLAS_SRC_DIR}/qdwconv_kernelsize.cpp
${MLAS_SRC_DIR}/sqnbitgemm.cpp
)

if (NOT onnxruntime_ORT_MINIMAL_BUILD)
Expand Down Expand Up @@ -68,6 +69,7 @@ function(setup_mlas_source_for_windows)
${MLAS_SRC_DIR}/qgemm_kernel_neon.cpp
${MLAS_SRC_DIR}/qgemm_kernel_udot.cpp
${MLAS_SRC_DIR}/qgemm_kernel_sdot.cpp
${MLAS_SRC_DIR}/sqnbitgemm_kernel_neon.cpp
)

set(mlas_platform_preprocess_srcs
Expand Down Expand Up @@ -334,6 +336,7 @@ else()
${MLAS_SRC_DIR}/qgemm_kernel_neon.cpp
${MLAS_SRC_DIR}/qgemm_kernel_udot.cpp
${MLAS_SRC_DIR}/qgemm_kernel_sdot.cpp
${MLAS_SRC_DIR}/sqnbitgemm_kernel_neon.cpp
)
if (NOT APPLE)
set(mlas_platform_srcs
Expand Down
20 changes: 7 additions & 13 deletions cmake/onnxruntime_python.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -339,9 +339,6 @@ configure_file(${ONNXRUNTIME_ROOT}/python/_pybind_state.py.in
${CMAKE_BINARY_DIR}/onnxruntime/capi/_pybind_state.py)

if (onnxruntime_ENABLE_TRAINING)
file(GLOB onnxruntime_python_capi_training_srcs CONFIGURE_DEPENDS
"${ORTTRAINING_SOURCE_DIR}/python/deprecated/*.py"
)
file(GLOB onnxruntime_python_root_srcs CONFIGURE_DEPENDS
"${ORTTRAINING_SOURCE_DIR}/python/training/*.py"
)
Expand Down Expand Up @@ -419,10 +416,6 @@ if (onnxruntime_ENABLE_TRAINING)
"${ORTTRAINING_SOURCE_DIR}/python/training/onnxblock/optim/*"
)
endif()
else()
file(GLOB onnxruntime_python_capi_training_srcs CONFIGURE_DEPENDS
"${ONNXRUNTIME_ROOT}/python/training/*.py"
)
endif()

if (onnxruntime_BUILD_UNIT_TESTS)
Expand All @@ -443,6 +436,9 @@ if (onnxruntime_BUILD_UNIT_TESTS)
file(GLOB onnxruntime_python_transformers_testdata_whisper CONFIGURE_DEPENDS
"${ONNXRUNTIME_ROOT}/test/python/transformers/test_data/models/whisper/*.onnx"
)
file(GLOB onnxruntime_python_transformers_testdata_conformer CONFIGURE_DEPENDS
"${ONNXRUNTIME_ROOT}/test/python/transformers/test_data/models/conformer/*.onnx"
)
endif()

file(GLOB onnxruntime_python_tools_srcs CONFIGURE_DEPENDS
Expand Down Expand Up @@ -556,6 +552,7 @@ add_custom_command(
COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/transformers/test_data/models
COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/transformers/test_data/models/whisper
COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/eager_test
COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/transformers/test_data/models/conformer
COMMAND ${CMAKE_COMMAND} -E copy
${ONNXRUNTIME_ROOT}/__init__.py
$<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/
Expand All @@ -577,9 +574,6 @@ add_custom_command(
COMMAND ${CMAKE_COMMAND} -E copy_if_different
${CMAKE_BINARY_DIR}/onnxruntime/capi/_pybind_state.py
$<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/capi/
COMMAND ${CMAKE_COMMAND} -E copy
${onnxruntime_python_capi_training_srcs}
$<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/capi/training/
COMMAND ${CMAKE_COMMAND} -E copy
$<TARGET_FILE:onnxruntime_pybind11_state>
$<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/capi/
Expand Down Expand Up @@ -711,6 +705,9 @@ if (onnxruntime_BUILD_UNIT_TESTS)
COMMAND ${CMAKE_COMMAND} -E copy
${onnxruntime_python_transformers_testdata_whisper}
$<TARGET_FILE_DIR:${build_output_target}>/transformers/test_data/models/whisper/
COMMAND ${CMAKE_COMMAND} -E copy
${onnxruntime_python_transformers_testdata_conformer}
$<TARGET_FILE_DIR:${build_output_target}>/transformers/test_data/models/conformer/
)
endif()

Expand Down Expand Up @@ -750,9 +747,6 @@ if (onnxruntime_ENABLE_TRAINING)
COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/training/utils
COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/training/utils/data/
COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/training/utils/hooks/
COMMAND ${CMAKE_COMMAND} -E copy
${onnxruntime_python_capi_training_srcs}
$<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/capi/training/
COMMAND ${CMAKE_COMMAND} -E copy
${onnxruntime_python_root_srcs}
$<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/training/
Expand Down
1 change: 1 addition & 0 deletions cmake/onnxruntime_rocm_hipify.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ set(contrib_ops_excluded_files
"math/gemm_float8.cc"
"math/gemm_float8.cu"
"math/gemm_float8.h"
"moe/*"
"quantization/attention_quantization.cc"
"quantization/attention_quantization.h"
"quantization/attention_quantization_impl.cu"
Expand Down
2 changes: 1 addition & 1 deletion cmake/onnxruntime_unittests.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -906,7 +906,7 @@ if (onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
endif()
if (CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
set_target_properties(onnxruntime_test_all PROPERTIES LINK_DEPENDS ${TEST_SRC_DIR}/wasm/onnxruntime_test_all_adapter.js)
set_target_properties(onnxruntime_test_all PROPERTIES LINK_FLAGS "-s STACK_SIZE=5242880 -s ALLOW_MEMORY_GROWTH=1 --pre-js \"${TEST_SRC_DIR}/wasm/onnxruntime_test_all_adapter.js\" -s \"EXPORTED_RUNTIME_METHODS=['FS']\" -s INCOMING_MODULE_JS_API=[preRun,locateFile] --preload-file ${CMAKE_CURRENT_BINARY_DIR}/testdata@/testdata -s EXIT_RUNTIME=1 -s DEMANGLE_SUPPORT=1")
set_target_properties(onnxruntime_test_all PROPERTIES LINK_FLAGS "-s STACK_SIZE=5242880 -s ALLOW_MEMORY_GROWTH=1 -s MAXIMUM_MEMORY=4294967296 --pre-js \"${TEST_SRC_DIR}/wasm/onnxruntime_test_all_adapter.js\" -s \"EXPORTED_RUNTIME_METHODS=['FS']\" -s INCOMING_MODULE_JS_API=[preRun,locateFile,arguments,onExit,wasmMemory,buffer,instantiateWasm] --preload-file ${CMAKE_CURRENT_BINARY_DIR}/testdata@/testdata -s EXIT_RUNTIME=1 -s DEMANGLE_SUPPORT=1")
if (onnxruntime_ENABLE_WEBASSEMBLY_THREADS)
set_property(TARGET onnxruntime_test_all APPEND_STRING PROPERTY LINK_FLAGS " -s DEFAULT_PTHREAD_STACK_SIZE=131072 -s PROXY_TO_PTHREAD=1")
endif()
Expand Down
47 changes: 24 additions & 23 deletions cmake/patches/composable_kernel/Fix_Clang_Build.patch
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 514b98fde..59c8a568a 100644
index b09da41a8..fca2bdf69 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,7 +1,7 @@
cmake_minimum_required(VERSION 3.14)
@@ -19,7 +19,7 @@ endif()

set(version 1.1.0)
# Check support for CUDA/HIP in Cmake
-project(composable_kernel)
+project(composable_kernel LANGUAGES CXX HIP)
-project(composable_kernel VERSION ${version})
+project(composable_kernel VERSION ${version} LANGUAGES CXX HIP)

list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake")

@@ -94,27 +94,6 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
@@ -173,27 +173,6 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
message("CMAKE_CXX_COMPILER_ID: ${CMAKE_CXX_COMPILER_ID}")

Expand Down Expand Up @@ -39,7 +39,7 @@ index 514b98fde..59c8a568a 100644
## HIP
find_package(HIP REQUIRED)
# Override HIP version in config.h, if necessary.
@@ -136,8 +115,6 @@ if( DEFINED CK_OVERRIDE_HIP_VERSION_PATCH )
@@ -215,8 +194,6 @@ if( DEFINED CK_OVERRIDE_HIP_VERSION_PATCH )
message(STATUS "CK_HIP_VERSION_PATCH overriden with ${CK_OVERRIDE_HIP_VERSION_PATCH}")
endif()
message(STATUS "Build with HIP ${HIP_VERSION}")
Expand All @@ -48,7 +48,7 @@ index 514b98fde..59c8a568a 100644

## tidy
include(EnableCompilerWarnings)
@@ -391,11 +368,3 @@ rocm_install(FILES
@@ -489,11 +466,3 @@ rocm_install(FILES

set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE")
set(CPACK_RPM_PACKAGE_LICENSE "MIT")
Expand All @@ -61,20 +61,21 @@ index 514b98fde..59c8a568a 100644
- HEADER_ONLY
-)
diff --git a/library/src/tensor_operation_instance/gpu/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/CMakeLists.txt
index 1d54a141b..4edd7dbfb 100644
index a0478c9f0..1e7782cd4 100644
--- a/library/src/tensor_operation_instance/gpu/CMakeLists.txt
+++ b/library/src/tensor_operation_instance/gpu/CMakeLists.txt
@@ -1,7 +1,13 @@
function(add_instance_library INSTANCE_NAME)
message("adding instance ${INSTANCE_NAME}")
+ set_source_files_properties(${ARGN} PROPERTIES LANGUAGE HIP)
add_library(${INSTANCE_NAME} OBJECT ${ARGN})
+ # Always disable debug symbol and C debug assert due to
+ # - Linker error: ... relocation truncated to fit ..., caused by object files to be linked are too huge.
+ # - https://github.com/ROCmSoftwarePlatform/composable_kernel/issues/622
+ target_compile_options(${INSTANCE_NAME} PRIVATE -g0 -DNDEBUG)
target_compile_features(${INSTANCE_NAME} PUBLIC)
+ target_compile_definitions(${INSTANCE_NAME} PRIVATE "__HIP_PLATFORM_AMD__=1" "__HIP_PLATFORM_HCC__=1")
set_target_properties(${INSTANCE_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON)
clang_tidy_check(${INSTANCE_NAME})
endfunction(add_instance_library INSTANCE_NAME)
@@ -44,8 +44,14 @@ function(add_instance_library INSTANCE_NAME)
endforeach()
#only continue if there are some source files left on the list
if(ARGN)
+ set_source_files_properties(${ARGN} PROPERTIES LANGUAGE HIP)
add_library(${INSTANCE_NAME} OBJECT ${ARGN})
+ # Always disable debug symbol and C debug assert due to
+ # - Linker error: ... relocation truncated to fit ..., caused by object files to be linked are too huge.
+ # - https://github.com/ROCmSoftwarePlatform/composable_kernel/issues/622
+ target_compile_options(${INSTANCE_NAME} PRIVATE -g0 -DNDEBUG)
target_compile_features(${INSTANCE_NAME} PUBLIC)
+ target_compile_definitions(${INSTANCE_NAME} PRIVATE "__HIP_PLATFORM_AMD__=1" "__HIP_PLATFORM_HCC__=1")
set_target_properties(${INSTANCE_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON)
clang_tidy_check(${INSTANCE_NAME})
set(result 0)
Loading

0 comments on commit 2fcb5a8

Please sign in to comment.