Skip to content

Commit

Permalink
lint
Browse files Browse the repository at this point in the history
  • Loading branch information
wangyems committed Mar 22, 2024
1 parent abec9b5 commit 379dffa
Show file tree
Hide file tree
Showing 24 changed files with 3,353 additions and 3,891 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,40 +24,41 @@ using namespace onnxruntime;

namespace ort_fastertransformer {

template <typename GemmKernel, bool enable_cutlass_3x = false> inline int compute_occupancy_for_kernel() {
int smem_size = int(sizeof(typename GemmKernel::SharedStorage));

if (smem_size > (48 << 10)) {
cudaFuncAttributes attr;
int device = 0;
int max_smem_per_block = 0;
CUDA_CALL_THROW(cudaGetDevice(&device));
CUDA_CALL_THROW(cudaDeviceGetAttribute(&max_smem_per_block, cudaDevAttrMaxSharedMemoryPerBlockOptin, device));
if constexpr (enable_cutlass_3x) {
CUDA_CALL_THROW(cudaFuncGetAttributes(&attr, cutlass::device_kernel<GemmKernel>));
} else {
CUDA_CALL_THROW(cudaFuncGetAttributes(&attr, cutlass::Kernel<GemmKernel>));
}
if (smem_size + attr.sharedSizeBytes >= static_cast<size_t>(max_smem_per_block)) {
// This should mean that
// cudaFuncSetAttribute(cutlass::Kernel<GemmKernel>, cudaFuncAttributeMaxDynamicSharedMemorySize, smem_size)
// wouldn't work. In that case, we return an occupancy of 0. This will cause the heuristic to ignore this
// configuration.
return 0;
}
}
template <typename GemmKernel, bool enable_cutlass_3x = false>
inline int compute_occupancy_for_kernel() {
int smem_size = int(sizeof(typename GemmKernel::SharedStorage));

Check warning on line 29 in onnxruntime/contrib_ops/cuda/moe/cutlass_extensions/compute_occupancy.h

View workflow job for this annotation

GitHub Actions / Lint C++

[cpplint] reported by reviewdog 🐶 Using deprecated casting style. Use static_cast<int>(...) instead [readability/casting] [4] Raw Output: onnxruntime/contrib_ops/cuda/moe/cutlass_extensions/compute_occupancy.h:29: Using deprecated casting style. Use static_cast<int>(...) instead [readability/casting] [4]

int max_active_blocks = -1;
if (smem_size > (48 << 10)) {
cudaFuncAttributes attr;
int device = 0;
int max_smem_per_block = 0;
CUDA_CALL_THROW(cudaGetDevice(&device));
CUDA_CALL_THROW(cudaDeviceGetAttribute(&max_smem_per_block, cudaDevAttrMaxSharedMemoryPerBlockOptin, device));
if constexpr (enable_cutlass_3x) {
CUDA_CALL_THROW(cudaOccupancyMaxActiveBlocksPerMultiprocessor(
&max_active_blocks, cutlass::device_kernel<GemmKernel>,
128 * (GemmKernel::NumLoadWarpGroups + GemmKernel::NumMmaWarpGroups), smem_size));
CUDA_CALL_THROW(cudaFuncGetAttributes(&attr, cutlass::device_kernel<GemmKernel>));
} else {
CUDA_CALL_THROW(cudaOccupancyMaxActiveBlocksPerMultiprocessor(&max_active_blocks, cutlass::Kernel<GemmKernel>,
GemmKernel::kThreadCount, smem_size));
CUDA_CALL_THROW(cudaFuncGetAttributes(&attr, cutlass::Kernel<GemmKernel>));
}
if (smem_size + attr.sharedSizeBytes >= static_cast<size_t>(max_smem_per_block)) {
// This should mean that
// cudaFuncSetAttribute(cutlass::Kernel<GemmKernel>, cudaFuncAttributeMaxDynamicSharedMemorySize, smem_size)
// wouldn't work. In that case, we return an occupancy of 0. This will cause the heuristic to ignore this
// configuration.
return 0;
}
}

int max_active_blocks = -1;
if constexpr (enable_cutlass_3x) {
CUDA_CALL_THROW(cudaOccupancyMaxActiveBlocksPerMultiprocessor(
&max_active_blocks, cutlass::device_kernel<GemmKernel>,
128 * (GemmKernel::NumLoadWarpGroups + GemmKernel::NumMmaWarpGroups), smem_size));
} else {
CUDA_CALL_THROW(cudaOccupancyMaxActiveBlocksPerMultiprocessor(&max_active_blocks, cutlass::Kernel<GemmKernel>,
GemmKernel::kThreadCount, smem_size));
}

return max_active_blocks;
return max_active_blocks;
}

} // namespace ort_fastertransformer
} // namespace ort_fastertransformer
Original file line number Diff line number Diff line change
Expand Up @@ -46,60 +46,50 @@

/////////////////////////////////////////////////////////////////////////////////////////////////

namespace cutlass
{
namespace epilogue
{
namespace thread
{
namespace cutlass {
namespace epilogue {
namespace thread {

/////////////////////////////////////////////////////////////////////////////////////////////////

__forceinline__ __device__ float copysignf_pos(float a, float b)
{
float r;
r = __int_as_float(__float_as_int(a) | (__float_as_int(b) & 0x80000000));
return r;
__forceinline__ __device__ float copysignf_pos(float a, float b) {
float r;
r = __int_as_float(__float_as_int(a) | (__float_as_int(b) & 0x80000000));
return r;
}

__forceinline__ __device__ float tanh_opt(float x)
{
__forceinline__ __device__ float tanh_opt(float x) {
#if (__CUDACC_VER_MAJOR__ < 11) || (__CUDA_ARCH__ < 750)
float const exp_val = -1.f * fabs(2 * x);
return copysignf_pos((1.0f - __expf(exp_val)) / (__expf(exp_val) + 1.0f), x);
float const exp_val = -1.f * fabs(2 * x);
return copysignf_pos((1.0f - __expf(exp_val)) / (__expf(exp_val) + 1.0f), x);
#else
return fast_tanh(x);
return fast_tanh(x);
#endif
}

/////////////////////////////////////////////////////////////////////////////////////////////////
template <>
struct GELU_taylor<float>
{
static bool const kIsHeavy = true;
struct GELU_taylor<float> {
static bool const kIsHeavy = true;

CUTLASS_DEVICE
float operator()(float const& z) const
{
CUTLASS_DEVICE
float operator()(float const& z) const {
float k0 = float(0.7978845608028654);

Check warning on line 77 in onnxruntime/contrib_ops/cuda/moe/cutlass_extensions/epilogue/thread/fused_activations.h

View workflow job for this annotation

GitHub Actions / Lint C++

[cpplint] reported by reviewdog 🐶 Using deprecated casting style. Use static_cast<float>(...) instead [readability/casting] [4] Raw Output: onnxruntime/contrib_ops/cuda/moe/cutlass_extensions/epilogue/thread/fused_activations.h:77: Using deprecated casting style. Use static_cast<float>(...) instead [readability/casting] [4]
float k1 = float(0.044715);

Check warning on line 78 in onnxruntime/contrib_ops/cuda/moe/cutlass_extensions/epilogue/thread/fused_activations.h

View workflow job for this annotation

GitHub Actions / Lint C++

[cpplint] reported by reviewdog 🐶 Using deprecated casting style. Use static_cast<float>(...) instead [readability/casting] [4] Raw Output: onnxruntime/contrib_ops/cuda/moe/cutlass_extensions/epilogue/thread/fused_activations.h:78: Using deprecated casting style. Use static_cast<float>(...) instead [readability/casting] [4]

float k0 = float(0.7978845608028654);
float k1 = float(0.044715);
return float(cutlass::constants::half<float>() * z * (cutlass::constants::one<float>() + tanh_opt(k0 * z * (cutlass::constants::one<float>() + k1 * z * z))));

Check warning on line 80 in onnxruntime/contrib_ops/cuda/moe/cutlass_extensions/epilogue/thread/fused_activations.h

View workflow job for this annotation

GitHub Actions / Lint C++

[cpplint] reported by reviewdog 🐶 Lines should be <= 120 characters long [whitespace/line_length] [2] Raw Output: onnxruntime/contrib_ops/cuda/moe/cutlass_extensions/epilogue/thread/fused_activations.h:80: Lines should be <= 120 characters long [whitespace/line_length] [2]

Check warning on line 80 in onnxruntime/contrib_ops/cuda/moe/cutlass_extensions/epilogue/thread/fused_activations.h

View workflow job for this annotation

GitHub Actions / Lint C++

[cpplint] reported by reviewdog 🐶 Using deprecated casting style. Use static_cast<float>(...) instead [readability/casting] [4] Raw Output: onnxruntime/contrib_ops/cuda/moe/cutlass_extensions/epilogue/thread/fused_activations.h:80: Using deprecated casting style. Use static_cast<float>(...) instead [readability/casting] [4]
}

return float(cutlass::constants::half<float>() * z
* (cutlass::constants::one<float>() + tanh_opt(k0 * z * (cutlass::constants::one<float>() + k1 * z * z))));
}
using Params = LinearCombinationGenericParams<float>;

using Params = LinearCombinationGenericParams<float>;

CUTLASS_DEVICE
float operator()(float const& scalar, Params const& params_) const
{
return this->operator()(scalar);
}
CUTLASS_DEVICE
float operator()(float const& scalar, Params const& params_) const {
return this->operator()(scalar);
}
};

} // namespace thread
} // namespace epilogue
} // namespace cutlass
} // namespace thread
} // namespace epilogue
} // namespace cutlass

/////////////////////////////////////////////////////////////////////////////////////////////////
Loading

0 comments on commit 379dffa

Please sign in to comment.