diff --git a/docs/models.md b/docs/models.md index 21501010..bafa09a9 100644 --- a/docs/models.md +++ b/docs/models.md @@ -67,6 +67,13 @@ | Qwen/Qwen2-1.5B-Instruct | [✔](#其它模型) | [✔](#qwen模型导出) | ✔ | | Qwen/Qwen2-7B-Instruct | [✔](#其它模型) | [✔](#qwen模型导出) | ✔ | | Qwen/Qwen2-72B-Instruct | | [✔](#qwen模型导出) | ✔ | +| Qwen/Qwen2.5-0.5B-Instruct | [✔](#其它模型) | [✔](#qwen模型导出) | ✔ | +| Qwen/Qwen2.5-1.5B-Instruct | [✔](#其它模型) | [✔](#qwen模型导出) | ✔ | +| Qwen/Qwen2.5-3B-Instruct | [✔](#其它模型) | [✔](#qwen模型导出) | ✔ | +| Qwen/Qwen2.5-7B-Instruct | [✔](#其它模型) | [✔](#qwen模型导出) | ✔ | +| Qwen/Qwen2.5-14B-Instruct | [✔](#其它模型) | [✔](#qwen模型导出) | ✔ | +| Qwen/Qwen2.5-32B-Instruct | √ | √ | ✔ | +| Qwen/Qwen2.5-72B-Instruct | | √ | ✔ | > 注3: 需要更新,检查 `tokenizer_config.json` 是否为最新版本 @@ -241,7 +248,7 @@ python3 tools/qwen2flm.py qwen-7b-int8.flm int8 #导出int8模型 python3 tools/qwen2flm.py qwen-7b-int4.flm int4 #导出int4模型 ``` -* **Qwen1.5** +* **Qwen1.5 / Qwen2 / Qwen2.5** ```sh # 需要先安装QWen2环境(transformers >= 4.37.0) diff --git a/example/Win32Demo/Win32Demo.cpp b/example/Win32Demo/Win32Demo.cpp index d656e273..d767a1b0 100644 --- a/example/Win32Demo/Win32Demo.cpp +++ b/example/Win32Demo/Win32Demo.cpp @@ -133,7 +133,7 @@ int initLLMConf(RunConfig config) { generationConfig->stop_token_ids.insert(model->weight.tokenizer.GetTokenId(it)); } std::string systemConfig = config.systemPrompt; - messages = new fastllm::ChatMessages({{"system", systemConfig}}); + messages = systemConfig.empty() ? new fastllm::ChatMessages() : new fastllm::ChatMessages({{"system", systemConfig}}); modelType = model->model_type; runType = config.webuiType ? RUN_TYPE_WEBUI : RUN_TYPE_CONSOLE; diff --git a/example/webui/webui.cpp b/example/webui/webui.cpp index 6afa6202..da003301 100644 --- a/example/webui/webui.cpp +++ b/example/webui/webui.cpp @@ -149,7 +149,8 @@ int main(int argc, char** argv) { locker.lock(); if (sessions.find(uuid) == sessions.end()) { sessions[uuid] = new ChatSession(); - sessions[uuid]->messages.push_back({"system", config.systemPrompt}); + if (!config.systemPrompt.empty()) + sessions[uuid]->messages.push_back({"system", config.systemPrompt}); } auto *session = sessions[uuid]; locker.unlock(); diff --git a/src/models/basellm.cpp b/src/models/basellm.cpp index ea0e6938..1494cc99 100644 --- a/src/models/basellm.cpp +++ b/src/models/basellm.cpp @@ -1143,6 +1143,7 @@ printf("len = %d, spend = %f s. tokens / s = %f\n", (int)total, spend, (float)to }); } ret["add_generation_prompt"] = fastllm::JinjaVar{1}; + ret["tools"] = fastllm::JinjaVar{std::vector ()}; return ret; } diff --git a/src/template.cpp b/src/template.cpp index 742bb4e1..3ba85db7 100644 --- a/src/template.cpp +++ b/src/template.cpp @@ -12,6 +12,8 @@ namespace fastllm { return true; } else if (this->stringValue == "false") { return false; + } else if (this->type == JinjaArray) { + return !this->arrayValue.empty(); } ErrorInFastLLM("Jinja error: " + this->Dump() + " is not bool."); return false; @@ -108,9 +110,9 @@ namespace fastllm { AssertInFastLLM(value[len - 1] == '}' && value[len - 2] == (value[1] == '%' ? '%' : '}'), "Jinja block error: " + value); int st = 2, end = len - 2; - if (value[1] == '%' && value[2] == '-') + if (value[2] == '-') st = 3; - if (value[len - 2] == '%' && value[len - 3] == '-') + if (value[len - 3] == '-') end = len - 3; while (st < end) { char now = value[st]; @@ -145,7 +147,7 @@ namespace fastllm { } if (value[j] == '\\') { AssertInFastLLM(j + 1 < end, "Jinja error: parse string failed: " + value.substr(st, std::min(10, (int)value.size() - st))); - cur += escapeChars[value[j + 1]]; + cur += escapeChars[value[++j]]; } else { cur += value[j]; } @@ -259,6 +261,8 @@ namespace fastllm { if (type == JinjaToken::JinjaTokenOr) { return -2; } else if (type == JinjaToken::JinjaTokenAnd) { + return -2; + } else if (type == JinjaToken::JinjaTokenNot) { return -1; } else if (type == JinjaToken::JinjaTokenEqual || type == JinjaToken::JinjaTokenNotEqual) { return 0; @@ -270,6 +274,8 @@ namespace fastllm { return 3; } else if (type == JinjaToken::JinjaTokenDOT) { return 4; + } else if (type == JinjaToken::JinjaTokenLSB || type == JinjaToken::JinjaTokenLMB) { + return -5; } else { ErrorInFastLLM("Jinja error: unsupport op: " + std::to_string(type)); return -1; @@ -348,6 +354,7 @@ namespace fastllm { tokens[i].type == JinjaToken::JinjaTokenIn || tokens[i].type == JinjaToken::JinjaTokenAnd || tokens[i].type == JinjaToken::JinjaTokenOr || + tokens[i].type == JinjaToken::JinjaTokenNot || tokens[i].type == JinjaToken::JinjaTokenFliter) { while (ops.size() > 0 && GetOpLevel(ops.back().type) > GetOpLevel(tokens[i].type)) { suffixExp.push_back(ops.back()); @@ -418,6 +425,14 @@ namespace fastllm { } else { ErrorInFastLLM("Jinja Error: unsupport filter " + b.stringValue); } + } else if (it.type == JinjaToken::JinjaTokenNot) { + AssertInFastLLM(vars.size() >= 1, "Jinja Error: expression error."); + JinjaVar a = vars.back(); + if (a.type == JinjaVar::JinjaNone) { + a = local[a]; + } + vars.pop_back(); + vars.push_back(a.type == JinjaVar::JinjaNone ? JinjaVar(1) : JinjaVar(!a.BoolValue())); } else if (it.type == JinjaToken::JinjaTokenAdd || it.type == JinjaToken::JinjaTokenSub || it.type == JinjaToken::JinjaTokenMul ||