From 486d5cd05b0cdfdbe2f35a7b4b7d445da6171dbc Mon Sep 17 00:00:00 2001
From: Your <wangye@microsoft.com>
Date: Mon, 25 Mar 2024 21:24:49 +0000
Subject: [PATCH] update

---
 .../contrib_ops/cuda/moe/ft_moe/moe_kernel.cu | 20 +++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)
diff --git a/onnxruntime/contrib_ops/cuda/moe/ft_moe/moe_kernel.cu b/onnxruntime/contrib_ops/cuda/moe/ft_moe/moe_kernel.cu
index 6868a2afaa42b..aedc9043aae3f 100644
--- a/onnxruntime/contrib_ops/cuda/moe/ft_moe/moe_kernel.cu
+++ b/onnxruntime/contrib_ops/cuda/moe/ft_moe/moe_kernel.cu
@@ -778,7 +778,16 @@ void CutlassMoeFCRunner<T, WeightType, Enable>::run_moe_fc(
                               hidden_size, inter_size, local_num_experts, stream);
 }
 
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 700
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 700
+template <typename T, typename WeightType, typename Enable>
+void CutlassMoeFCRunner<T, WeightType, Enable>::run_moe_fc(const T*, const T*, const WeightType*, const T*, const T*,
+                                                           ActivationType, const WeightType*, const T*, const T*,
+                                                           const WeightType*, const T*, int, const int, const int, int,
+                                                           int, int, int k, char*, T*, T*, int*, int*, cudaStream_t) {
+  // MoE gemm only supports Volta+ architectures
+  ;
+}
+#else
 template <typename T, typename WeightType, typename Enable>
 void CutlassMoeFCRunner<T, WeightType, Enable>::run_moe_fc(
     const T* input_activations, const T* gating_output, const WeightType* fc1_expert_weights, const T* fc1_scales,
@@ -792,15 +801,6 @@ void CutlassMoeFCRunner<T, WeightType, Enable>::run_moe_fc(
              inter_size, num_experts, local_num_experts, local_experts_start_index, k, workspace_ptr, fc2_result,
              nullptr, num_rows, expert_scales, expanded_source_row_to_expanded_dest_row, expert_for_source_row, stream);
 }
-#else
-template <typename T, typename WeightType, typename Enable>
-void CutlassMoeFCRunner<T, WeightType, Enable>::run_moe_fc(const T*, const T*, const WeightType*, const T*, const T*,
-                                                           ActivationType, const WeightType*, const T*, const T*,
-                                                           const WeightType*, const T*, int, const int, const int, int,
-                                                           int, int, int k, char*, T*, T*, int*, int*, cudaStream_t) {
-  // MoE gemm only supports Volta+ architectures
-  ;
-}
 #endif
 
 template <typename T, typename WeightType, typename Enable>