Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
PeixuanZuo committed Oct 7, 2023
1 parent 3a77520 commit 37cccf1
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 24 deletions.
2 changes: 1 addition & 1 deletion cgmanifests/generated/cgmanifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -392,7 +392,7 @@
"component": {
"type": "git",
"git": {
"commitHash": "58817bf967d3b7b314233f6331f0430cf40651ae",
"commitHash": "04f93aadb8fca26c141f70ab19710c460b46f6c0",
"repositoryUrl": "https://github.com/ROCmSoftwarePlatform/composable_kernel.git"
},
"comments": "composable_kernel"
Expand Down
2 changes: 1 addition & 1 deletion cmake/deps.txt
Original file line number Diff line number Diff line change
Expand Up @@ -44,4 +44,4 @@ tensorboard;https://github.com/tensorflow/tensorboard/archive/373eb09e4c5d2b3cc2
cutlass;https://github.com/NVIDIA/cutlass/archive/refs/tags/v3.0.0.zip;0f95b3c1fc1bd1175c4a90b2c9e39074d1bccefd
utf8_range;https://github.com/protocolbuffers/utf8_range/archive/72c943dea2b9240cd09efde15191e144bc7c7d38.zip;9925739c9debc0efa2adcb194d371a35b6a03156
extensions;https://github.com/microsoft/onnxruntime-extensions/archive/94142d8391c9791ec71c38336436319a2d4ac7a0.zip;4365ac5140338b4cb75a39944a4be276e3829b3c
composable_kernel;https://github.com/ROCmSoftwarePlatform/composable_kernel/archive/58817bf967d3b7b314233f6331f0430cf40651ae.zip;070c01f2ffdde6f515f3075b8fff3ea2ad819827
composable_kernel;https://github.com/ROCmSoftwarePlatform/composable_kernel/archive/04f93aadb8fca26c141f70ab19710c460b46f6c0.zip;cd7083c2205b1a4fb6c1f9b7b0e597b8609f3f38
4 changes: 2 additions & 2 deletions cmake/external/composable_kernel.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ if(NOT composable_kernel_POPULATED)
FetchContent_Populate(composable_kernel)
set(BUILD_DEV OFF CACHE BOOL "Disable -Weverything, otherwise, error: 'constexpr' specifier is incompatible with C++98 [-Werror,-Wc++98-compat]" FORCE)
# Exclude i8 device gemm instances due to excessive long compilation time and not being used
set(DTYPES fp32 fp16 bf16)
set(DTYPES fp32 fp16 bf16 fp8)
set(INSTANCES_ONLY ON)
add_subdirectory(${composable_kernel_SOURCE_DIR} ${composable_kernel_BINARY_DIR} EXCLUDE_FROM_ALL)

Expand All @@ -21,5 +21,5 @@ if(NOT composable_kernel_POPULATED)
${composable_kernel_SOURCE_DIR}/include
${composable_kernel_BINARY_DIR}/include
${composable_kernel_SOURCE_DIR}/library/include)
target_compile_definitions(onnxruntime_composable_kernel_includes INTERFACE __fp32__ __fp16__ __bf16__)
target_compile_definitions(onnxruntime_composable_kernel_includes INTERFACE __fp32__ __fp16__ __bf16__ __fp8__)
endif()
63 changes: 43 additions & 20 deletions cmake/patches/composable_kernel/Fix_Clang_Build.patch
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 32defea75..8aa431e9c 100644
index b09da41a8..fca2bdf69 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -15,7 +15,7 @@ endif()
@@ -19,7 +19,7 @@ endif()

set(version 1.1.0)
# Check support for CUDA/HIP in Cmake
Expand All @@ -11,7 +11,7 @@ index 32defea75..8aa431e9c 100644

list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake")

@@ -167,27 +167,6 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
@@ -173,27 +173,6 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
message("CMAKE_CXX_COMPILER_ID: ${CMAKE_CXX_COMPILER_ID}")

Expand Down Expand Up @@ -39,7 +39,7 @@ index 32defea75..8aa431e9c 100644
## HIP
find_package(HIP REQUIRED)
# Override HIP version in config.h, if necessary.
@@ -209,8 +188,6 @@ if( DEFINED CK_OVERRIDE_HIP_VERSION_PATCH )
@@ -215,8 +194,6 @@ if( DEFINED CK_OVERRIDE_HIP_VERSION_PATCH )
message(STATUS "CK_HIP_VERSION_PATCH overriden with ${CK_OVERRIDE_HIP_VERSION_PATCH}")
endif()
message(STATUS "Build with HIP ${HIP_VERSION}")
Expand All @@ -48,7 +48,7 @@ index 32defea75..8aa431e9c 100644

## tidy
include(EnableCompilerWarnings)
@@ -483,11 +460,3 @@ rocm_install(FILES
@@ -489,11 +466,3 @@ rocm_install(FILES

set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE")
set(CPACK_RPM_PACKAGE_LICENSE "MIT")
Expand All @@ -60,21 +60,44 @@ index 32defea75..8aa431e9c 100644
- LDCONFIG
- HEADER_ONLY
-)
diff --git a/library/include/ck/library/tensor_operation_instance/gpu/gemm_splitk.hpp b/library/include/ck/library/tensor_operation_instance/gpu/gemm_splitk.hpp
index 4cba0875d..07b0adb42 100644
--- a/library/include/ck/library/tensor_operation_instance/gpu/gemm_splitk.hpp
+++ b/library/include/ck/library/tensor_operation_instance/gpu/gemm_splitk.hpp
@@ -58,7 +58,7 @@ void add_device_gemm_xdl_splitk_f32_f32_f32_mk_nk_mn_instances(
DeviceGemmSplitK<Row, Col, Row, F32, F32, F32, PassThrough, PassThrough, PassThrough>>>&
instances);
#endif
-#if(defined(CK_ENABLE_FP16) || defined(CK_ENABLE_FP8))
+#if(defined(CK_ENABLE_FP16) && defined(CK_ENABLE_FP8))
void add_device_gemm_xdl_splitk_f8_f16_f16_km_kn_mn_instances(
std::vector<std::unique_ptr<
DeviceGemmSplitK<Col, Row, Row, F8, F16, F16, PassThrough, PassThrough, PassThrough>>>&
@@ -182,7 +182,7 @@ struct DeviceOperationInstanceFactory<
}
}
#endif
-#if(defined(CK_ENABLE_FP16) || defined(CK_ENABLE_FP8))
+#if(defined(CK_ENABLE_FP16) && defined(CK_ENABLE_FP8))
else if constexpr(is_same_v<ADataType, f8_t> && is_same_v<BDataType, half_t> &&
is_same_v<CDataType, half_t>)
{
diff --git a/library/src/tensor_operation_instance/gpu/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/CMakeLists.txt
index 1d54a141b..4edd7dbfb 100644
index a0478c9f0..1e7782cd4 100644
--- a/library/src/tensor_operation_instance/gpu/CMakeLists.txt
+++ b/library/src/tensor_operation_instance/gpu/CMakeLists.txt
@@ -1,7 +1,13 @@
function(add_instance_library INSTANCE_NAME)
message("adding instance ${INSTANCE_NAME}")
+ set_source_files_properties(${ARGN} PROPERTIES LANGUAGE HIP)
add_library(${INSTANCE_NAME} OBJECT ${ARGN})
+ # Always disable debug symbol and C debug assert due to
+ # - Linker error: ... relocation truncated to fit ..., caused by object files to be linked are too huge.
+ # - https://github.com/ROCmSoftwarePlatform/composable_kernel/issues/622
+ target_compile_options(${INSTANCE_NAME} PRIVATE -g0 -DNDEBUG)
target_compile_features(${INSTANCE_NAME} PUBLIC)
+ target_compile_definitions(${INSTANCE_NAME} PRIVATE "__HIP_PLATFORM_AMD__=1" "__HIP_PLATFORM_HCC__=1")
set_target_properties(${INSTANCE_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON)
clang_tidy_check(${INSTANCE_NAME})
endfunction(add_instance_library INSTANCE_NAME)
@@ -44,8 +44,14 @@ function(add_instance_library INSTANCE_NAME)
endforeach()
#only continue if there are some source files left on the list
if(ARGN)
+ set_source_files_properties(${ARGN} PROPERTIES LANGUAGE HIP)
add_library(${INSTANCE_NAME} OBJECT ${ARGN})
+ # Always disable debug symbol and C debug assert due to
+ # - Linker error: ... relocation truncated to fit ..., caused by object files to be linked are too huge.
+ # - https://github.com/ROCmSoftwarePlatform/composable_kernel/issues/622
+ target_compile_options(${INSTANCE_NAME} PRIVATE -g0 -DNDEBUG)
target_compile_features(${INSTANCE_NAME} PUBLIC)
+ target_compile_definitions(${INSTANCE_NAME} PRIVATE "__HIP_PLATFORM_AMD__=1" "__HIP_PLATFORM_HCC__=1")
set_target_properties(${INSTANCE_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON)
clang_tidy_check(${INSTANCE_NAME})
set(result 0)

0 comments on commit 37cccf1

Please sign in to comment.