From c1ad08e6d3bfd80427fb7f19d6bd9454e308e4a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=84=E5=AE=87=E6=89=AC?= Date: Fri, 19 Jul 2024 18:20:17 +0800 Subject: [PATCH] =?UTF-8?q?deep=20seek=20v2=E6=B7=BB=E5=8A=A0linear=20name?= =?UTF-8?q?s?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/models/deepseekv2.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/models/deepseekv2.cpp b/src/models/deepseekv2.cpp index 8e7530e..6c53770 100644 --- a/src/models/deepseekv2.cpp +++ b/src/models/deepseekv2.cpp @@ -32,6 +32,18 @@ namespace fastllm { this->history_sep = ""; weight.embeddingNames.insert("model.embed_tokens.weight"); + weight.linearNames = { + "lm_head.weight", "model.layers.*.mlp*down_proj.weight", "model.layers.*.mlp*up_proj.weight", + "model.layers.*.mlp*gate_proj.weight", + "model.layers.*.self_attn.q_proj.weight", + "model.layers.*.self_attn.q_a_proj.weight", + "model.layers.*.self_attn.q_b_proj.weight", + "model.layers.*.self_attn.kv_a_proj_with_mqa.weight", + "model.layers.*.self_attn.kv_b_proj.weight", + "model.layers.*.self_attn.o_proj.weight", "model.layers.*.self_attn.q_proj.weight", "model.layers.*.self_attn.k_proj.weight", + "model.layers.*.self_attn.v_proj.weight", "model.layers.*.self_attn.mergeqkv.weight", "model.layers.*.self_attn.W_pack.weight", + "model.layers.*.mlp.gate.weight" + }; } void DeepSeekV2Model::InitParams() {