Skip to content

Commit

Permalink
deep seek v2添加linear names
Browse files Browse the repository at this point in the history
  • Loading branch information
黄宇扬 committed Jul 19, 2024
1 parent 9c018ed commit c1ad08e
Showing 1 changed file with 12 additions and 0 deletions.
12 changes: 12 additions & 0 deletions src/models/deepseekv2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,18 @@ namespace fastllm {
this->history_sep = "</s>";

weight.embeddingNames.insert("model.embed_tokens.weight");
weight.linearNames = {
"lm_head.weight", "model.layers.*.mlp*down_proj.weight", "model.layers.*.mlp*up_proj.weight",
"model.layers.*.mlp*gate_proj.weight",
"model.layers.*.self_attn.q_proj.weight",
"model.layers.*.self_attn.q_a_proj.weight",
"model.layers.*.self_attn.q_b_proj.weight",
"model.layers.*.self_attn.kv_a_proj_with_mqa.weight",
"model.layers.*.self_attn.kv_b_proj.weight",
"model.layers.*.self_attn.o_proj.weight", "model.layers.*.self_attn.q_proj.weight", "model.layers.*.self_attn.k_proj.weight",
"model.layers.*.self_attn.v_proj.weight", "model.layers.*.self_attn.mergeqkv.weight", "model.layers.*.self_attn.W_pack.weight",
"model.layers.*.mlp.gate.weight"
};
}

void DeepSeekV2Model::InitParams() {
Expand Down

0 comments on commit c1ad08e

Please sign in to comment.