lint/exclude rocm

microsoft · Nov 6, 2023 · d708937 · d708937
1 parent 1d3ca92
commit d708937
Show file tree

Hide file tree

Showing 18 changed files with 1,936 additions and 2,038 deletions.
diff --git a/cmake/onnxruntime_rocm_hipify.cmake b/cmake/onnxruntime_rocm_hipify.cmake
@@ -51,6 +51,7 @@ set(contrib_ops_excluded_files
   "math/gemm_float8.cc"
   "math/gemm_float8.cu"
   "math/gemm_float8.h"
+  "moe/*"
   "quantization/attention_quantization.cc"
   "quantization/attention_quantization.h"
   "quantization/attention_quantization_impl.cu"

diff --git a/onnxruntime/contrib_ops/cuda/moe/ft_moe/common.h b/onnxruntime/contrib_ops/cuda/moe/ft_moe/common.h
@@ -25,56 +25,50 @@
 #include "stdio.h"
 #include <fstream>
 
-namespace fastertransformer
-{
+namespace fastertransformer {
 
-static const char *_cudaGetErrorEnum(cublasStatus_t error)
-{
-  switch (error)
-  {
-  case CUBLAS_STATUS_SUCCESS:
-    return "CUBLAS_STATUS_SUCCESS";
+static const char* _cudaGetErrorEnum(cublasStatus_t error) {
+  switch (error) {
+    case CUBLAS_STATUS_SUCCESS:
+      return "CUBLAS_STATUS_SUCCESS";
 
-  case CUBLAS_STATUS_NOT_INITIALIZED:
-    return "CUBLAS_STATUS_NOT_INITIALIZED";
+    case CUBLAS_STATUS_NOT_INITIALIZED:
+      return "CUBLAS_STATUS_NOT_INITIALIZED";
 
-  case CUBLAS_STATUS_ALLOC_FAILED:
-    return "CUBLAS_STATUS_ALLOC_FAILED";
+    case CUBLAS_STATUS_ALLOC_FAILED:
+      return "CUBLAS_STATUS_ALLOC_FAILED";
 
-  case CUBLAS_STATUS_INVALID_VALUE:
-    return "CUBLAS_STATUS_INVALID_VALUE";
+    case CUBLAS_STATUS_INVALID_VALUE:
+      return "CUBLAS_STATUS_INVALID_VALUE";
 
-  case CUBLAS_STATUS_ARCH_MISMATCH:
-    return "CUBLAS_STATUS_ARCH_MISMATCH";
+    case CUBLAS_STATUS_ARCH_MISMATCH:
+      return "CUBLAS_STATUS_ARCH_MISMATCH";
 
-  case CUBLAS_STATUS_MAPPING_ERROR:
-    return "CUBLAS_STATUS_MAPPING_ERROR";
+    case CUBLAS_STATUS_MAPPING_ERROR:
+      return "CUBLAS_STATUS_MAPPING_ERROR";
 
-  case CUBLAS_STATUS_EXECUTION_FAILED:
-    return "CUBLAS_STATUS_EXECUTION_FAILED";
+    case CUBLAS_STATUS_EXECUTION_FAILED:
+      return "CUBLAS_STATUS_EXECUTION_FAILED";
 
-  case CUBLAS_STATUS_INTERNAL_ERROR:
-    return "CUBLAS_STATUS_INTERNAL_ERROR";
+    case CUBLAS_STATUS_INTERNAL_ERROR:
+      return "CUBLAS_STATUS_INTERNAL_ERROR";
 
-  case CUBLAS_STATUS_NOT_SUPPORTED:
-    return "CUBLAS_STATUS_NOT_SUPPORTED";
+    case CUBLAS_STATUS_NOT_SUPPORTED:
+      return "CUBLAS_STATUS_NOT_SUPPORTED";
 
-  case CUBLAS_STATUS_LICENSE_ERROR:
-    return "CUBLAS_STATUS_LICENSE_ERROR";
+    case CUBLAS_STATUS_LICENSE_ERROR:
+      return "CUBLAS_STATUS_LICENSE_ERROR";
   }
   return "<unknown>";
 }
 
-static const char *_cudaGetErrorEnum(cudaError_t error)
-{
+static const char* _cudaGetErrorEnum(cudaError_t error) {
   return cudaGetErrorString(error);
 }
 
 template <typename T>
-void check(T result, char const *const func, const char *const file, int const line)
-{
-  if (result)
-  {
+void check(T result, char const* const func, const char* const file, int const line) {
+  if (result) {
     throw std::runtime_error(std::string("[FT][ERROR] CUDA runtime error: ") +
                              (_cudaGetErrorEnum(result)) + " " + file +
                              ":" + std::to_string(line) + " \n");
@@ -83,4 +77,4 @@ void check(T result, char const *const func, const char *const file, int const l
 
 #define check_cuda_error(val) fastertransformer::check((val), #val, __FILE__, __LINE__)
 
-} // namespace fastertransformer
+}  // namespace fastertransformer
diff --git a/onnxruntime/contrib_ops/cuda/moe/ft_moe/compute_occupancy.h b/onnxruntime/contrib_ops/cuda/moe/ft_moe/compute_occupancy.h
@@ -22,30 +22,28 @@
 
 namespace fastertransformer {
 
-template<typename GemmKernel>
-inline int compute_occupancy_for_kernel()
-{
-
-    int smem_size = int(sizeof(typename GemmKernel::SharedStorage));
-
-    if (smem_size > (48 << 10)) {
-        cudaError_t status =
-            cudaFuncSetAttribute(cutlass::Kernel<GemmKernel>, cudaFuncAttributeMaxDynamicSharedMemorySize, smem_size);
-        if (status == cudaError::cudaErrorInvalidValue) {
-            // Clear the error bit since we can ignore this.
-            // This should mean that smem_size > cudaDevAttrMaxSharedMemoryPerBlockOptin. In that case, we return an
-            // occupancy of 0. This will cause the heuristic to ignore this configuration.
-            status = cudaGetLastError();
-            return 0;
-        }
-        check_cuda_error(status);
+template <typename GemmKernel>
+inline int compute_occupancy_for_kernel() {
+  int smem_size = int(sizeof(typename GemmKernel::SharedStorage));
+
+  if (smem_size > (48 << 10)) {
+    cudaError_t status =
+        cudaFuncSetAttribute(cutlass::Kernel<GemmKernel>, cudaFuncAttributeMaxDynamicSharedMemorySize, smem_size);
+    if (status == cudaError::cudaErrorInvalidValue) {
+      // Clear the error bit since we can ignore this.
+      // This should mean that smem_size > cudaDevAttrMaxSharedMemoryPerBlockOptin. In that case, we return an
+      // occupancy of 0. This will cause the heuristic to ignore this configuration.
+      status = cudaGetLastError();
+      return 0;
     }
+    check_cuda_error(status);
+  }
 
-    int max_active_blocks = -1;
-    check_cuda_error(cudaOccupancyMaxActiveBlocksPerMultiprocessor(
-        &max_active_blocks, cutlass::Kernel<GemmKernel>, GemmKernel::kThreadCount, smem_size));
+  int max_active_blocks = -1;
+  check_cuda_error(cudaOccupancyMaxActiveBlocksPerMultiprocessor(
+      &max_active_blocks, cutlass::Kernel<GemmKernel>, GemmKernel::kThreadCount, smem_size));
 
-    return max_active_blocks;
+  return max_active_blocks;
 }
 
 }  // namespace fastertransformer