diff --git a/onnxruntime/contrib_ops/rocm/bert/multihead_attention.cu b/onnxruntime/contrib_ops/rocm/bert/multihead_attention.cu index a8d7bb2672b4b..ea5cac9e30387 100644 --- a/onnxruntime/contrib_ops/rocm/bert/multihead_attention.cu +++ b/onnxruntime/contrib_ops/rocm/bert/multihead_attention.cu @@ -123,7 +123,8 @@ Status MultiHeadAttention::ComputeInternal(OpKernelContext* context) const { key_padding_mask, relative_position_bias, past_key, past_value, past_seq_len, &attn, - num_heads_, is_unidirectional_, mask_filter_value_, scale_, + num_heads_, false, /*is_unidirectional_*/ + mask_filter_value_, scale_, past_present_share_buffer_, false, device_prop.maxThreadsPerBlock)); if (attn_type_ == kDecoderMaskedMultiHeadAttention && attn.sequence_length != 1) {