From 1e92f5490e2f7c89956bf01fdf0f3fd228e6e081 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=84=E5=AE=87=E6=89=AC?= Date: Fri, 19 Jul 2024 15:28:12 +0800 Subject: [PATCH] =?UTF-8?q?=E5=A2=9E=E5=8A=A0python=E8=87=AA=E5=AE=9A?= =?UTF-8?q?=E4=B9=89=E6=A8=A1=E5=9E=8B=E7=9A=84=E7=A4=BA=E4=BE=8B(qwen2?= =?UTF-8?q?=E6=A8=A1=E5=9E=8B)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- example/python/custom_model.py | 50 ++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 example/python/custom_model.py diff --git a/example/python/custom_model.py b/example/python/custom_model.py new file mode 100644 index 00000000..b4253e2c --- /dev/null +++ b/example/python/custom_model.py @@ -0,0 +1,50 @@ +from ftllm import llm +from ftllm.llm import ComputeGraph +import os +import math + +root_path = "/mnt/hfmodels/" +model_path = os.path.join(root_path, "Qwen/Qwen2-7B-Instruct") + +class Qwen2Model(ComputeGraph): + def build(self): + weight, data, config = self.weight, self.data, self.config + head_dim = config["hidden_size"] // config["num_attention_heads"] + self.Embedding(data["inputIds"], weight["model.embed_tokens.weight"], data["hiddenStates"]); + self.DataTypeAs(data["hiddenStates"], data["atype"]) + for i in range(config["num_hidden_layers"]): + pastKey = data["pastKey."][i] + pastValue = data["pastValue."][i] + layer = weight["model.layers."][i] + self.RMSNorm(data["hiddenStates"], layer[".input_layernorm.weight"], config["rms_norm_eps"], data["attenInput"]) + self.Linear(data["attenInput"], layer[".self_attn.q_proj.weight"], layer[".self_attn.q_proj.bias"], data["q"]) + self.Linear(data["attenInput"], layer[".self_attn.k_proj.weight"], layer[".self_attn.k_proj.bias"], data["k"]) + self.Linear(data["attenInput"], layer[".self_attn.v_proj.weight"], layer[".self_attn.v_proj.bias"], data["v"]) + self.ExpandHead(data["q"], head_dim) + self.ExpandHead(data["k"], head_dim) + self.ExpandHead(data["v"], head_dim) + self.LlamaRotatePosition2D(data["q"], data["positionIds"], data["sin"], data["cos"], head_dim // 2) + self.LlamaRotatePosition2D(data["k"], data["positionIds"], data["sin"], data["cos"], head_dim // 2) + self.FusedAttention(data["q"], pastKey, pastValue, data["k"], data["v"], data["attenInput"], + data["attentionMask"], data["attenOutput"], data["seqLens"], 1.0 / math.sqrt(head_dim)) + self.Linear(data["attenOutput"], layer[".self_attn.o_proj.weight"], layer[".self_attn.o_proj.bias"], data["attenLastOutput"]); + self.AddTo(data["hiddenStates"], data["attenLastOutput"]); + self.RMSNorm(data["hiddenStates"], layer[".post_attention_layernorm.weight"], config["rms_norm_eps"], data["attenInput"]) + self.Linear(data["attenInput"], layer[".mlp.gate_proj.weight"], layer[".mlp.gate_proj.bias"], data["w1"]) + self.Linear(data["attenInput"], layer[".mlp.up_proj.weight"], layer[".mlp.up_proj.bias"], data["w3"]) + self.Silu(data["w1"], data["w1"]) + self.MulTo(data["w1"], data["w3"]) + self.Linear(data["w1"], layer[".mlp.down_proj.weight"], layer[".mlp.down_proj.bias"], data["w2"]) + self.AddTo(data["hiddenStates"], data["w2"]) + self.SplitLastTokenStates(data["hiddenStates"], data["seqLens"], data["lastTokensStates"]) + self.RMSNorm(data["lastTokensStates"], weight["model.norm.weight"], config["rms_norm_eps"], data["lastTokensStates"]) + self.Linear(data["lastTokensStates"], weight["lm_head.weight"], weight["lm_head.bias"], data["logits"]) + +model = llm.model(model_path, graph = Qwen2Model) +prompt = "北京有什么景点?" +messages = [ + {"role": "system", "content": "你是一个爱说英文的人工智能,不管我跟你说什么语言,你都会用英文回复我"}, + {"role": "user", "content": prompt} +] +for response in model.stream_response(messages, one_by_one = True): + print(response, flush = True, end = "") \ No newline at end of file