diff --git a/onnxruntime/contrib_ops/cuda/bert/packed_attention.cc b/onnxruntime/contrib_ops/cuda/bert/packed_attention.cc index 55deed55dfd33..e4b90727121cf 100644 --- a/onnxruntime/contrib_ops/cuda/bert/packed_attention.cc +++ b/onnxruntime/contrib_ops/cuda/bert/packed_attention.cc @@ -304,7 +304,7 @@ Status PackedAttention::ComputeInternal(OpKernelContext* context) const { int m = parameters.token_count; int n = parameters.hidden_size + parameters.hidden_size + parameters.v_hidden_size; int k = parameters.input_hidden_size; - gemm_buffer = this->GetScratchBuffer(static_cast(m) * n, context->GetComputeStream()); + gemm_buffer = this->template GetScratchBuffer(static_cast(m) * n, context->GetComputeStream()); cublasHandle_t cublas = this->GetCublasHandle(context); @@ -328,7 +328,7 @@ Status PackedAttention::ComputeInternal(OpKernelContext* context) const { false, use_memory_efficient_attention, no_qkv_workspace); - auto work_space = this->GetScratchBuffer(workSpaceSize, context->GetComputeStream()); + auto work_space = this->template GetScratchBuffer(workSpaceSize, context->GetComputeStream()); typedef typename ToCudaType::MappedType CudaT; PackedAttentionData data; diff --git a/onnxruntime/contrib_ops/cuda/bert/packed_multihead_attention.cc b/onnxruntime/contrib_ops/cuda/bert/packed_multihead_attention.cc index b4a162989978c..00ab32886112b 100644 --- a/onnxruntime/contrib_ops/cuda/bert/packed_multihead_attention.cc +++ b/onnxruntime/contrib_ops/cuda/bert/packed_multihead_attention.cc @@ -298,7 +298,7 @@ Status PackedMultiHeadAttention::ComputeInternal(OpKernelContext* context) co use_flash_attention, use_memory_efficient_attention, no_qkv_workspace); - auto work_space = this->GetScratchBuffer(workSpaceSize, context->GetComputeStream()); + auto work_space = this->template GetScratchBuffer(workSpaceSize, context->GetComputeStream()); typedef typename ToCudaType::MappedType CudaT; PackedMultiHeadAttentionData data; diff --git a/onnxruntime/contrib_ops/cuda/moe/moe_base.h b/onnxruntime/contrib_ops/cuda/moe/moe_base.h index 4ecf1a6206643..4a407fa1b2159 100644 --- a/onnxruntime/contrib_ops/cuda/moe/moe_base.h +++ b/onnxruntime/contrib_ops/cuda/moe/moe_base.h @@ -4,6 +4,7 @@ #pragma once #include "core/common/common.h" +#include "core/framework/tensor_shape.h" #include "core/framework/op_kernel.h" #include "contrib_ops/cuda/moe/ft_moe/moe_gemm_kernels.h" @@ -130,14 +131,14 @@ class MoEBase { fc3_experts_weights_optional->Shape().GetDims() != fc1_experts_weights_dims) { return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "fc3_experts_weights_dims must be equal to fc1_experts_weights_dims, got ", - fc3_experts_weights_optional->Shape().GetDims(), " and ", fc1_experts_weights_dims); + fc3_experts_weights_optional->Shape(), " and ", TensorShape(fc1_experts_weights_dims)); } if (fc3_experts_bias_optional != nullptr && fc1_experts_bias_optional != nullptr && fc3_experts_bias_optional->Shape().GetDims() != fc1_experts_bias_optional->Shape().GetDims()) { return ORT_MAKE_STATUS( ONNXRUNTIME, INVALID_ARGUMENT, "fc3_experts_bias_dims must be equal to fc1_experts_bias_dims, got ", - fc3_experts_bias_optional->Shape().GetDims(), " and ", fc1_experts_bias_optional->Shape().GetDims()); + fc3_experts_bias_optional->Shape(), " and ", fc1_experts_bias_optional->Shape()); } parameters.num_rows = num_rows; @@ -199,7 +200,7 @@ class MoEBase { if (fc3_experts_scales != nullptr && fc1_experts_scales_dims != fc3_experts_scales->Shape().GetDims()) { return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "fc3_experts_scales must be equal to fc1_experts_scales, got ", - fc3_experts_scales->Shape().GetDims(), " and ", fc1_experts_scales_dims); + fc3_experts_scales->Shape(), " and ", TensorShape(fc1_experts_scales_dims)); } return Status::OK();