Skip to content

Commit

Permalink
lint/exclude rocm
Browse files Browse the repository at this point in the history
  • Loading branch information
wangyems committed Nov 6, 2023
1 parent 1d3ca92 commit d708937
Show file tree
Hide file tree
Showing 18 changed files with 1,936 additions and 2,038 deletions.
1 change: 1 addition & 0 deletions cmake/onnxruntime_rocm_hipify.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ set(contrib_ops_excluded_files
"math/gemm_float8.cc"
"math/gemm_float8.cu"
"math/gemm_float8.h"
"moe/*"
"quantization/attention_quantization.cc"
"quantization/attention_quantization.h"
"quantization/attention_quantization_impl.cu"
Expand Down
60 changes: 27 additions & 33 deletions onnxruntime/contrib_ops/cuda/moe/ft_moe/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,56 +25,50 @@
#include "stdio.h"

Check warning on line 25 in onnxruntime/contrib_ops/cuda/moe/ft_moe/common.h

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/contrib_ops/cuda/moe/ft_moe/common.h#L25

Include the directory when naming header files [build/include_subdir] [4]
Raw output
onnxruntime/contrib_ops/cuda/moe/ft_moe/common.h:25:  Include the directory when naming header files  [build/include_subdir] [4]
#include <fstream>

Check warning on line 26 in onnxruntime/contrib_ops/cuda/moe/ft_moe/common.h

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/contrib_ops/cuda/moe/ft_moe/common.h#L26

Found C++ system header after other header. Should be: common.h, c system, c++ system, other. [build/include_order] [4]
Raw output
onnxruntime/contrib_ops/cuda/moe/ft_moe/common.h:26:  Found C++ system header after other header. Should be: common.h, c system, c++ system, other.  [build/include_order] [4]

namespace fastertransformer
{
namespace fastertransformer {

static const char *_cudaGetErrorEnum(cublasStatus_t error)
{
switch (error)
{
case CUBLAS_STATUS_SUCCESS:
return "CUBLAS_STATUS_SUCCESS";
static const char* _cudaGetErrorEnum(cublasStatus_t error) {
switch (error) {
case CUBLAS_STATUS_SUCCESS:
return "CUBLAS_STATUS_SUCCESS";

case CUBLAS_STATUS_NOT_INITIALIZED:
return "CUBLAS_STATUS_NOT_INITIALIZED";
case CUBLAS_STATUS_NOT_INITIALIZED:
return "CUBLAS_STATUS_NOT_INITIALIZED";

case CUBLAS_STATUS_ALLOC_FAILED:
return "CUBLAS_STATUS_ALLOC_FAILED";
case CUBLAS_STATUS_ALLOC_FAILED:
return "CUBLAS_STATUS_ALLOC_FAILED";

case CUBLAS_STATUS_INVALID_VALUE:
return "CUBLAS_STATUS_INVALID_VALUE";
case CUBLAS_STATUS_INVALID_VALUE:
return "CUBLAS_STATUS_INVALID_VALUE";

case CUBLAS_STATUS_ARCH_MISMATCH:
return "CUBLAS_STATUS_ARCH_MISMATCH";
case CUBLAS_STATUS_ARCH_MISMATCH:
return "CUBLAS_STATUS_ARCH_MISMATCH";

case CUBLAS_STATUS_MAPPING_ERROR:
return "CUBLAS_STATUS_MAPPING_ERROR";
case CUBLAS_STATUS_MAPPING_ERROR:
return "CUBLAS_STATUS_MAPPING_ERROR";

case CUBLAS_STATUS_EXECUTION_FAILED:
return "CUBLAS_STATUS_EXECUTION_FAILED";
case CUBLAS_STATUS_EXECUTION_FAILED:
return "CUBLAS_STATUS_EXECUTION_FAILED";

case CUBLAS_STATUS_INTERNAL_ERROR:
return "CUBLAS_STATUS_INTERNAL_ERROR";
case CUBLAS_STATUS_INTERNAL_ERROR:
return "CUBLAS_STATUS_INTERNAL_ERROR";

case CUBLAS_STATUS_NOT_SUPPORTED:
return "CUBLAS_STATUS_NOT_SUPPORTED";
case CUBLAS_STATUS_NOT_SUPPORTED:
return "CUBLAS_STATUS_NOT_SUPPORTED";

case CUBLAS_STATUS_LICENSE_ERROR:
return "CUBLAS_STATUS_LICENSE_ERROR";
case CUBLAS_STATUS_LICENSE_ERROR:
return "CUBLAS_STATUS_LICENSE_ERROR";
}
return "<unknown>";
}

static const char *_cudaGetErrorEnum(cudaError_t error)
{
static const char* _cudaGetErrorEnum(cudaError_t error) {
return cudaGetErrorString(error);
}

template <typename T>
void check(T result, char const *const func, const char *const file, int const line)
{
if (result)
{
void check(T result, char const* const func, const char* const file, int const line) {
if (result) {
throw std::runtime_error(std::string("[FT][ERROR] CUDA runtime error: ") +

Check warning on line 72 in onnxruntime/contrib_ops/cuda/moe/ft_moe/common.h

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/contrib_ops/cuda/moe/ft_moe/common.h#L72

Add #include <string> for string [build/include_what_you_use] [4]
Raw output
onnxruntime/contrib_ops/cuda/moe/ft_moe/common.h:72:  Add #include <string> for string  [build/include_what_you_use] [4]
(_cudaGetErrorEnum(result)) + " " + file +
":" + std::to_string(line) + " \n");
Expand All @@ -83,4 +77,4 @@ void check(T result, char const *const func, const char *const file, int const l

#define check_cuda_error(val) fastertransformer::check((val), #val, __FILE__, __LINE__)

} // namespace fastertransformer
} // namespace fastertransformer
40 changes: 19 additions & 21 deletions onnxruntime/contrib_ops/cuda/moe/ft_moe/compute_occupancy.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,30 +22,28 @@

namespace fastertransformer {

template<typename GemmKernel>
inline int compute_occupancy_for_kernel()
{

int smem_size = int(sizeof(typename GemmKernel::SharedStorage));

if (smem_size > (48 << 10)) {
cudaError_t status =
cudaFuncSetAttribute(cutlass::Kernel<GemmKernel>, cudaFuncAttributeMaxDynamicSharedMemorySize, smem_size);
if (status == cudaError::cudaErrorInvalidValue) {
// Clear the error bit since we can ignore this.
// This should mean that smem_size > cudaDevAttrMaxSharedMemoryPerBlockOptin. In that case, we return an
// occupancy of 0. This will cause the heuristic to ignore this configuration.
status = cudaGetLastError();
return 0;
}
check_cuda_error(status);
template <typename GemmKernel>
inline int compute_occupancy_for_kernel() {
int smem_size = int(sizeof(typename GemmKernel::SharedStorage));

Check warning on line 27 in onnxruntime/contrib_ops/cuda/moe/ft_moe/compute_occupancy.h

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/contrib_ops/cuda/moe/ft_moe/compute_occupancy.h#L27

Using deprecated casting style. Use static_cast<int>(...) instead [readability/casting] [4]
Raw output
onnxruntime/contrib_ops/cuda/moe/ft_moe/compute_occupancy.h:27:  Using deprecated casting style.  Use static_cast<int>(...) instead  [readability/casting] [4]

if (smem_size > (48 << 10)) {
cudaError_t status =
cudaFuncSetAttribute(cutlass::Kernel<GemmKernel>, cudaFuncAttributeMaxDynamicSharedMemorySize, smem_size);
if (status == cudaError::cudaErrorInvalidValue) {
// Clear the error bit since we can ignore this.
// This should mean that smem_size > cudaDevAttrMaxSharedMemoryPerBlockOptin. In that case, we return an
// occupancy of 0. This will cause the heuristic to ignore this configuration.
status = cudaGetLastError();
return 0;
}
check_cuda_error(status);
}

int max_active_blocks = -1;
check_cuda_error(cudaOccupancyMaxActiveBlocksPerMultiprocessor(
&max_active_blocks, cutlass::Kernel<GemmKernel>, GemmKernel::kThreadCount, smem_size));
int max_active_blocks = -1;
check_cuda_error(cudaOccupancyMaxActiveBlocksPerMultiprocessor(
&max_active_blocks, cutlass::Kernel<GemmKernel>, GemmKernel::kThreadCount, smem_size));

return max_active_blocks;
return max_active_blocks;
}

} // namespace fastertransformer
Loading

0 comments on commit d708937

Please sign in to comment.