Skip to content

Commit

Permalink
Merge pull request #496 from TylunasLi/doc
Browse files Browse the repository at this point in the history
C++支持直接读取Qwen2.5系列HF模型
  • Loading branch information
ztxz16 authored Nov 27, 2024
2 parents 0886280 + f9ea416 commit ddbd6db
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 6 deletions.
9 changes: 8 additions & 1 deletion docs/models.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,13 @@
| Qwen/Qwen2-1.5B-Instruct | [](#其它模型) | [](#qwen模型导出) ||
| Qwen/Qwen2-7B-Instruct | [](#其它模型) | [](#qwen模型导出) ||
| Qwen/Qwen2-72B-Instruct | | [](#qwen模型导出) ||
| Qwen/Qwen2.5-0.5B-Instruct | [](#其它模型) | [](#qwen模型导出) ||
| Qwen/Qwen2.5-1.5B-Instruct | [](#其它模型) | [](#qwen模型导出) ||
| Qwen/Qwen2.5-3B-Instruct | [](#其它模型) | [](#qwen模型导出) ||
| Qwen/Qwen2.5-7B-Instruct | [](#其它模型) | [](#qwen模型导出) ||
| Qwen/Qwen2.5-14B-Instruct | [](#其它模型) | [](#qwen模型导出) ||
| Qwen/Qwen2.5-32B-Instruct ||||
| Qwen/Qwen2.5-72B-Instruct | |||

> 注3: 需要更新,检查 `tokenizer_config.json` 是否为最新版本
Expand Down Expand Up @@ -241,7 +248,7 @@ python3 tools/qwen2flm.py qwen-7b-int8.flm int8 #导出int8模型
python3 tools/qwen2flm.py qwen-7b-int4.flm int4 #导出int4模型
```

* **Qwen1.5**
* **Qwen1.5 / Qwen2 / Qwen2.5**

```sh
# 需要先安装QWen2环境(transformers >= 4.37.0)
Expand Down
2 changes: 1 addition & 1 deletion example/Win32Demo/Win32Demo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ int initLLMConf(RunConfig config) {
generationConfig->stop_token_ids.insert(model->weight.tokenizer.GetTokenId(it));
}
std::string systemConfig = config.systemPrompt;
messages = new fastllm::ChatMessages({{"system", systemConfig}});
messages = systemConfig.empty() ? new fastllm::ChatMessages() : new fastllm::ChatMessages({{"system", systemConfig}});

modelType = model->model_type;
runType = config.webuiType ? RUN_TYPE_WEBUI : RUN_TYPE_CONSOLE;
Expand Down
3 changes: 2 additions & 1 deletion example/webui/webui.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,8 @@ int main(int argc, char** argv) {
locker.lock();
if (sessions.find(uuid) == sessions.end()) {
sessions[uuid] = new ChatSession();
sessions[uuid]->messages.push_back({"system", config.systemPrompt});
if (!config.systemPrompt.empty())
sessions[uuid]->messages.push_back({"system", config.systemPrompt});
}
auto *session = sessions[uuid];
locker.unlock();
Expand Down
1 change: 1 addition & 0 deletions src/models/basellm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1143,6 +1143,7 @@ printf("len = %d, spend = %f s. tokens / s = %f\n", (int)total, spend, (float)to
});
}
ret["add_generation_prompt"] = fastllm::JinjaVar{1};
ret["tools"] = fastllm::JinjaVar{std::vector <JinjaVar>()};
return ret;
}

Expand Down
21 changes: 18 additions & 3 deletions src/template.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ namespace fastllm {
return true;
} else if (this->stringValue == "false") {
return false;
} else if (this->type == JinjaArray) {
return !this->arrayValue.empty();
}
ErrorInFastLLM("Jinja error: " + this->Dump() + " is not bool.");
return false;
Expand Down Expand Up @@ -108,9 +110,9 @@ namespace fastllm {
AssertInFastLLM(value[len - 1] == '}' && value[len - 2] == (value[1] == '%' ? '%' : '}'),
"Jinja block error: " + value);
int st = 2, end = len - 2;
if (value[1] == '%' && value[2] == '-')
if (value[2] == '-')
st = 3;
if (value[len - 2] == '%' && value[len - 3] == '-')
if (value[len - 3] == '-')
end = len - 3;
while (st < end) {
char now = value[st];
Expand Down Expand Up @@ -145,7 +147,7 @@ namespace fastllm {
}
if (value[j] == '\\') {
AssertInFastLLM(j + 1 < end, "Jinja error: parse string failed: " + value.substr(st, std::min(10, (int)value.size() - st)));
cur += escapeChars[value[j + 1]];
cur += escapeChars[value[++j]];
} else {
cur += value[j];
}
Expand Down Expand Up @@ -259,6 +261,8 @@ namespace fastllm {
if (type == JinjaToken::JinjaTokenOr) {
return -2;
} else if (type == JinjaToken::JinjaTokenAnd) {
return -2;
} else if (type == JinjaToken::JinjaTokenNot) {
return -1;
} else if (type == JinjaToken::JinjaTokenEqual || type == JinjaToken::JinjaTokenNotEqual) {
return 0;
Expand All @@ -270,6 +274,8 @@ namespace fastllm {
return 3;
} else if (type == JinjaToken::JinjaTokenDOT) {
return 4;
} else if (type == JinjaToken::JinjaTokenLSB || type == JinjaToken::JinjaTokenLMB) {
return -5;
} else {
ErrorInFastLLM("Jinja error: unsupport op: " + std::to_string(type));
return -1;
Expand Down Expand Up @@ -348,6 +354,7 @@ namespace fastllm {
tokens[i].type == JinjaToken::JinjaTokenIn ||
tokens[i].type == JinjaToken::JinjaTokenAnd ||
tokens[i].type == JinjaToken::JinjaTokenOr ||
tokens[i].type == JinjaToken::JinjaTokenNot ||
tokens[i].type == JinjaToken::JinjaTokenFliter) {
while (ops.size() > 0 && GetOpLevel(ops.back().type) > GetOpLevel(tokens[i].type)) {
suffixExp.push_back(ops.back());
Expand Down Expand Up @@ -418,6 +425,14 @@ namespace fastllm {
} else {
ErrorInFastLLM("Jinja Error: unsupport filter " + b.stringValue);
}
} else if (it.type == JinjaToken::JinjaTokenNot) {
AssertInFastLLM(vars.size() >= 1, "Jinja Error: expression error.");
JinjaVar a = vars.back();
if (a.type == JinjaVar::JinjaNone) {
a = local[a];
}
vars.pop_back();
vars.push_back(a.type == JinjaVar::JinjaNone ? JinjaVar(1) : JinjaVar(!a.BoolValue()));
} else if (it.type == JinjaToken::JinjaTokenAdd ||
it.type == JinjaToken::JinjaTokenSub ||
it.type == JinjaToken::JinjaTokenMul ||
Expand Down

0 comments on commit ddbd6db

Please sign in to comment.