From 486d5cd05b0cdfdbe2f35a7b4b7d445da6171dbc Mon Sep 17 00:00:00 2001 From: Your Date: Mon, 25 Mar 2024 21:24:49 +0000 Subject: [PATCH] update --- .../contrib_ops/cuda/moe/ft_moe/moe_kernel.cu | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/onnxruntime/contrib_ops/cuda/moe/ft_moe/moe_kernel.cu b/onnxruntime/contrib_ops/cuda/moe/ft_moe/moe_kernel.cu index 6868a2afaa42b..aedc9043aae3f 100644 --- a/onnxruntime/contrib_ops/cuda/moe/ft_moe/moe_kernel.cu +++ b/onnxruntime/contrib_ops/cuda/moe/ft_moe/moe_kernel.cu @@ -778,7 +778,16 @@ void CutlassMoeFCRunner::run_moe_fc( hidden_size, inter_size, local_num_experts, stream); } -#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 700 +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 700 +template +void CutlassMoeFCRunner::run_moe_fc(const T*, const T*, const WeightType*, const T*, const T*, + ActivationType, const WeightType*, const T*, const T*, + const WeightType*, const T*, int, const int, const int, int, + int, int, int k, char*, T*, T*, int*, int*, cudaStream_t) { + // MoE gemm only supports Volta+ architectures + ; +} +#else template void CutlassMoeFCRunner::run_moe_fc( const T* input_activations, const T* gating_output, const WeightType* fc1_expert_weights, const T* fc1_scales, @@ -792,15 +801,6 @@ void CutlassMoeFCRunner::run_moe_fc( inter_size, num_experts, local_num_experts, local_experts_start_index, k, workspace_ptr, fc2_result, nullptr, num_rows, expert_scales, expanded_source_row_to_expanded_dest_row, expert_for_source_row, stream); } -#else -template -void CutlassMoeFCRunner::run_moe_fc(const T*, const T*, const WeightType*, const T*, const T*, - ActivationType, const WeightType*, const T*, const T*, - const WeightType*, const T*, int, const int, const int, int, - int, int, int k, char*, T*, T*, int*, int*, cudaStream_t) { - // MoE gemm only supports Volta+ architectures - ; -} #endif template