Merge pull request #496 from TylunasLi/doc

C++支持直接读取Qwen2.5系列HF模型
ztxz16 · Nov 27, 2024 · ddbd6db · ddbd6db
2 parents 0886280 + f9ea416
commit ddbd6db
Show file tree

Hide file tree

Showing 5 changed files with 30 additions and 6 deletions.
diff --git a/docs/models.md b/docs/models.md
@@ -67,6 +67,13 @@
 | Qwen/Qwen2-1.5B-Instruct | [✔](#其它模型) | [✔](#qwen模型导出) | ✔ |
 | Qwen/Qwen2-7B-Instruct   | [✔](#其它模型) | [✔](#qwen模型导出) | ✔ |
 | Qwen/Qwen2-72B-Instruct  |  | [✔](#qwen模型导出) | ✔ |
+| Qwen/Qwen2.5-0.5B-Instruct | [✔](#其它模型) | [✔](#qwen模型导出) | ✔ |
+| Qwen/Qwen2.5-1.5B-Instruct | [✔](#其它模型) | [✔](#qwen模型导出) | ✔ |
+| Qwen/Qwen2.5-3B-Instruct   | [✔](#其它模型) | [✔](#qwen模型导出) | ✔ |
+| Qwen/Qwen2.5-7B-Instruct   | [✔](#其它模型) | [✔](#qwen模型导出) | ✔ |
+| Qwen/Qwen2.5-14B-Instruct  | [✔](#其它模型) | [✔](#qwen模型导出) | ✔ |
+| Qwen/Qwen2.5-32B-Instruct  | √ | √ | ✔ |
+| Qwen/Qwen2.5-72B-Instruct  |  | √ | ✔ |
 
 > 注3： 需要更新，检查 `tokenizer_config.json` 是否为最新版本
 
@@ -241,7 +248,7 @@ python3 tools/qwen2flm.py qwen-7b-int8.flm int8 #导出int8模型
 python3 tools/qwen2flm.py qwen-7b-int4.flm int4 #导出int4模型
 ```
 
-* **Qwen1.5**
+* **Qwen1.5 / Qwen2 / Qwen2.5**
 
 ```sh
 # 需要先安装QWen2环境（transformers >= 4.37.0）

diff --git a/example/Win32Demo/Win32Demo.cpp b/example/Win32Demo/Win32Demo.cpp
@@ -133,7 +133,7 @@ int initLLMConf(RunConfig config) {
 		generationConfig->stop_token_ids.insert(model->weight.tokenizer.GetTokenId(it));
 	}
 	std::string systemConfig = config.systemPrompt;
-	messages = new fastllm::ChatMessages({{"system", systemConfig}});
+	messages = systemConfig.empty() ? new fastllm::ChatMessages() : new fastllm::ChatMessages({{"system", systemConfig}});
 
 	modelType = model->model_type;
 	runType = config.webuiType ? RUN_TYPE_WEBUI : RUN_TYPE_CONSOLE;

diff --git a/example/webui/webui.cpp b/example/webui/webui.cpp
@@ -149,7 +149,8 @@ int main(int argc, char** argv) {
         locker.lock();
         if (sessions.find(uuid) == sessions.end()) {
             sessions[uuid] = new ChatSession();
-            sessions[uuid]->messages.push_back({"system", config.systemPrompt});
+            if (!config.systemPrompt.empty())
+                sessions[uuid]->messages.push_back({"system", config.systemPrompt});
         }
         auto *session = sessions[uuid];
         locker.unlock();

diff --git a/src/models/basellm.cpp b/src/models/basellm.cpp
@@ -1143,6 +1143,7 @@ printf("len = %d, spend = %f s. tokens / s = %f\n", (int)total, spend, (float)to
             });
         }
         ret["add_generation_prompt"] = fastllm::JinjaVar{1};
+        ret["tools"] = fastllm::JinjaVar{std::vector <JinjaVar>()};
         return ret;
     }
 

diff --git a/src/template.cpp b/src/template.cpp
@@ -12,6 +12,8 @@ namespace fastllm {
             return true;
         } else if (this->stringValue == "false") {
             return false;
+        } else if (this->type == JinjaArray) {
+            return !this->arrayValue.empty();
         }
         ErrorInFastLLM("Jinja error: " + this->Dump() + " is not bool.");
         return false;
@@ -108,9 +110,9 @@ namespace fastllm {
                 AssertInFastLLM(value[len - 1] == '}' && value[len - 2] == (value[1] == '%' ? '%' : '}'), 
                                 "Jinja block error: " + value);
                 int st = 2, end = len - 2;
-                if (value[1] == '%' && value[2] == '-')
+                if (value[2] == '-')
                     st = 3;
-                if (value[len - 2] == '%' && value[len - 3] == '-')
+                if (value[len - 3] == '-')
                     end = len - 3;
                 while (st < end) {
                     char now = value[st];
@@ -145,7 +147,7 @@ namespace fastllm {
                             }
                             if (value[j] == '\\') {
                                 AssertInFastLLM(j + 1 < end, "Jinja error: parse string failed: " + value.substr(st, std::min(10, (int)value.size() - st)));
-                                cur += escapeChars[value[j + 1]];
+                                cur += escapeChars[value[++j]];
                             } else {
                                 cur += value[j];
                             }
@@ -259,6 +261,8 @@ namespace fastllm {
         if (type == JinjaToken::JinjaTokenOr) {
             return -2;
         } else if (type == JinjaToken::JinjaTokenAnd) {
+            return -2;
+        } else if (type == JinjaToken::JinjaTokenNot) {
             return -1;
         } else if (type == JinjaToken::JinjaTokenEqual || type == JinjaToken::JinjaTokenNotEqual) {
             return 0;
@@ -270,6 +274,8 @@ namespace fastllm {
             return 3;
         } else if (type == JinjaToken::JinjaTokenDOT) {
             return 4;
+        } else if (type == JinjaToken::JinjaTokenLSB || type == JinjaToken::JinjaTokenLMB) {
+            return -5;
         } else {
             ErrorInFastLLM("Jinja error: unsupport op: " + std::to_string(type));
             return -1;
@@ -348,6 +354,7 @@ namespace fastllm {
                         tokens[i].type == JinjaToken::JinjaTokenIn ||
                         tokens[i].type == JinjaToken::JinjaTokenAnd ||
                         tokens[i].type == JinjaToken::JinjaTokenOr ||
+                        tokens[i].type == JinjaToken::JinjaTokenNot ||
                         tokens[i].type == JinjaToken::JinjaTokenFliter) {
                 while (ops.size() > 0 && GetOpLevel(ops.back().type) > GetOpLevel(tokens[i].type)) {
                     suffixExp.push_back(ops.back());
@@ -418,6 +425,14 @@ namespace fastllm {
                 } else {
                     ErrorInFastLLM("Jinja Error: unsupport filter " + b.stringValue);
                 }
+            } else if (it.type == JinjaToken::JinjaTokenNot) {
+                AssertInFastLLM(vars.size() >= 1, "Jinja Error: expression error.");
+                JinjaVar a = vars.back();
+                if (a.type == JinjaVar::JinjaNone) {
+                    a = local[a];
+                }
+                vars.pop_back();
+                vars.push_back(a.type == JinjaVar::JinjaNone ? JinjaVar(1) : JinjaVar(!a.BoolValue()));
             } else if (it.type == JinjaToken::JinjaTokenAdd ||
                         it.type == JinjaToken::JinjaTokenSub ||
                         it.type == JinjaToken::JinjaTokenMul ||
-Original file line number
+Diff line change
@@ Expand Up @@
                 });
             }
             ret["add_generation_prompt"] = fastllm::JinjaVar{1};
+            ret["tools"] = fastllm::JinjaVar{std::vector <JinjaVar>()};
             return ret;
         }
@@ Expand Down @@