Skip to content

Commit

Permalink
Merge pull request #483 from TylunasLi/doc
Browse files Browse the repository at this point in the history
C++支持直接读取Deepseek Coder V1系列HF模型
  • Loading branch information
ztxz16 authored Aug 2, 2024
2 parents 49e45e3 + bd7e4bc commit 3e9a49d
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 20 deletions.
9 changes: 4 additions & 5 deletions docs/models.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,17 +74,16 @@

| 模型 | 加载后转换 | 离线转换 | 直接读取 |
|-------------------------------------------: |------------|------------|------------|
| deepseek-ai/Deepseek-Coder-1.3B-Instruct | [](llama_cookbook.md#deepseek-coder) | [](llama_cookbook.md#deepseek-coder) |<sup>4</sup><sup>5</sup> |
| deepseek-ai/Deepseek-Coder-6.7B-Instruct | [](llama_cookbook.md#deepseek-coder) | [](llama_cookbook.md#deepseek-coder) |<sup>4</sup><sup>5</sup> |
| deepseek-ai/Deepseek-Coder-7B-Instruct v1.5 | [](llama_cookbook.md#deepseek-coder) | [](llama_cookbook.md#deepseek-coder) |<sup>4</sup> |
| deepseek-ai/deepseek-coder-33b-instruct | [](llama_cookbook.md#deepseek-coder) | [](llama_cookbook.md#deepseek-coder) |<sup>4</sup> |
| deepseek-ai/Deepseek-Coder-1.3B-Instruct | [](llama_cookbook.md#deepseek-coder) | [](llama_cookbook.md#deepseek-coder) ||
| deepseek-ai/Deepseek-Coder-6.7B-Instruct | [](llama_cookbook.md#deepseek-coder) | [](llama_cookbook.md#deepseek-coder) ||
| deepseek-ai/Deepseek-Coder-7B-Instruct v1.5 | [](llama_cookbook.md#deepseek-coder) | [](llama_cookbook.md#deepseek-coder) ||
| deepseek-ai/deepseek-coder-33b-instruct | [](llama_cookbook.md#deepseek-coder) | [](llama_cookbook.md#deepseek-coder) ||
| deepseek-ai/DeepSeek-V2-Chat ||||
| deepseek-ai/DeepSeek-V2-Lite-Chat ||||
| deepseek-ai/DeepSeek-Coder-V2-Instruct ||||
| deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct ||||

> 注4: Python ftllm用AutoTokenizer而不使用Fastllm Tokenizer可以实现加载,但是C++程序尚不支持加载该模型的Tokenizer。
> 注5: C++端仅支持最早的几个 `tokenizer_config.json` 版本

### LLaMA类模型
Expand Down
9 changes: 5 additions & 4 deletions include/template.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ namespace fastllm {
};

JinjaVarType type = JinjaNone;
long long intValue;
float floatValue;
long long intValue = 0LL;
float floatValue = 0.0f;
std::string stringValue;
std::vector <JinjaVar> arrayValue;
std::map <std::string, JinjaVar> dictValue;
Expand Down Expand Up @@ -55,7 +55,7 @@ namespace fastllm {
JinjaTokenIn,
JinjaTokenAssign, JinjaTokenNotEqual, JinjaTokenEqual, JinjaTokenAdd, JinjaTokenSub, JinjaTokenMul, JinjaTokenDiv,
JinjaTokenNot, JinjaTokenAnd, JinjaTokenOr,
JinjaTokenFliter
JinjaTokenFliter, JinjaTokenNamespace
};

JinjaToKenType type;
Expand Down Expand Up @@ -96,7 +96,8 @@ namespace fastllm {
{"false", JinjaToken::JinjaToKenType::JinjaTokenBOOL},
{"and", JinjaToken::JinjaToKenType::JinjaTokenAnd},
{"or", JinjaToken::JinjaToKenType::JinjaTokenOr},
{"not", JinjaToken::JinjaToKenType::JinjaTokenNot}
{"not", JinjaToken::JinjaToKenType::JinjaTokenNot},
{"namespace", JinjaToken::JinjaToKenType::JinjaTokenNamespace}
};

// 一个Jinja块
Expand Down
57 changes: 46 additions & 11 deletions src/template.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -280,13 +280,20 @@ namespace fastllm {
this->temp = temp;
// 词法解析
int pos = 0;
bool trimNext = false;
for (int i = 0; i < temp.size(); i++) {
if (temp[i] == '{' && i + 1 < temp.size() && (temp[i + 1] == '{' || temp[i + 1] == '%') ) {
blocks.push_back(JinjaBlock(temp.substr(pos, i - pos)));
size_t curEnd = temp[i + 1] == '%' ? temp.find("%}", i + 2) : temp.find("}}", i + 2);
AssertInFastLLM(curEnd != -1,
"Can't find blockend: " + temp.substr(i, std::min(10, (int)temp.size() - i)));
std::string part = temp.substr(pos, i - pos);
if (temp[i + 2] == '-')
part.erase(0, part.find_first_not_of(" \n\r\t"));
if (trimNext)
part.erase(part.find_last_not_of(" \n\r\t") + 1);
blocks.push_back(JinjaBlock(part));
blocks.push_back(temp.substr(i, curEnd + 2 - i));
trimNext = (temp[curEnd - 1] == '-');
pos = curEnd + 2;
i = curEnd + 1;
}
Expand Down Expand Up @@ -314,6 +321,15 @@ namespace fastllm {
}
AssertInFastLLM(ops.size() > 0 && ops.back().type == JinjaToken::JinjaTokenLSB, "Error: barckets doesn't match.");
ops.pop_back();
} else if (tokens[i].type == JinjaToken::JinjaTokenNamespace) {
// 目前仅支持 "变量 = 表达式" 格式
AssertInFastLLM(
tokens.size() - i >= 3 &&
tokens[i + 2].type == JinjaToken::JinjaTokenID &&
tokens[i + 3].type == JinjaToken::JinjaTokenAssign,
"Jinja error: only support format \"(var = expression)\"."
);
ops.push_back(tokens[i]);
} else if (tokens[i].type == JinjaToken::JinjaTokenRMB) {
while (ops.size() > 0 && ops.back().type != JinjaToken::JinjaTokenLMB) {
suffixExp.push_back(ops.back());
Expand Down Expand Up @@ -380,6 +396,15 @@ namespace fastllm {
vars.pop_back();
vars.pop_back();
vars.push_back(a[b]);
} else if (it.type == JinjaToken::JinjaTokenNamespace) {
AssertInFastLLM(vars.size() > 1, "Jinja Error: expression error.");
JinjaVar a = vars[vars.size() - 2], b = vars.back();
if (b.type == JinjaVar::JinjaNone) {
b = local[b];
}
vars.pop_back();
vars.pop_back();
vars.push_back(JinjaVar({{a.stringValue, b}}));
} else if (it.type == JinjaToken::JinjaTokenFliter) {
AssertInFastLLM(vars.size() > 1, "Jinja Error: expression error.");
JinjaVar a = vars[vars.size() - 2], b = vars.back();
Expand Down Expand Up @@ -428,7 +453,7 @@ namespace fastllm {
for (int i = st; i < end; i++) {
JinjaBlock &curBlock = blocks[i];
if (curBlock.type == JinjaBlock::JinjaBlockType::JinjaBlockOriginal) {
ret += JinjaTrim(JinjaVar(curBlock.value)).stringValue;
ret += curBlock.value;
} else if (curBlock.type == JinjaBlock::JinjaBlockType::JinjaBlockVar) {
ret += ComputeExpression(var, curBlock.tokens, 0, curBlock.tokens.size()).DirectValue();
} else if (curBlock.type == JinjaBlock::JinjaBlockFor) {
Expand Down Expand Up @@ -526,15 +551,25 @@ namespace fastllm {
i = endPos;
} else if (curBlock.type == JinjaBlock::JinjaBlockSet) {
// 目前仅支持 "set 变量 = 表达式" 格式
AssertInFastLLM(
curBlock.tokens.size() >= 4 &&
curBlock.tokens[1].type == JinjaToken::JinjaTokenID &&
curBlock.tokens[2].type == JinjaToken::JinjaTokenAssign,
"Jinja error: only support format \"set var = expression\"."
);

std::string iterId = curBlock.tokens[1].value;
var[iterId] = ComputeExpression(var, curBlock.tokens, 3, curBlock.tokens.size());
if (curBlock.tokens.size() >= 4 &&
curBlock.tokens[1].type == JinjaToken::JinjaTokenID &&
curBlock.tokens[2].type == JinjaToken::JinjaTokenAssign) {
std::string iterId = curBlock.tokens[1].value;
var[iterId] = ComputeExpression(var, curBlock.tokens, 3, curBlock.tokens.size());
} else if (curBlock.tokens.size() >= 4 &&
curBlock.tokens[1].type == JinjaToken::JinjaTokenID &&
curBlock.tokens[curBlock.tokens.size() - 2].type == JinjaToken::JinjaTokenAssign) {
JinjaVar value = ComputeExpression(var, curBlock.tokens, curBlock.tokens.size() - 1, curBlock.tokens.size());
JinjaVar key = ComputeExpression(var, curBlock.tokens, 1, curBlock.tokens.size() - 2);
key.type = value.type;
key.intValue = value.intValue;
key.floatValue = value.floatValue;
key.stringValue = value.stringValue;
key.arrayValue = value.arrayValue;
key.dictValue = value.dictValue;
} else {
ErrorInFastLLM("Jinja error: only support format \"set var = expression\".");
}
} else {
ErrorInFastLLM("Jinja usupport block: " + curBlock.value);
}
Expand Down

0 comments on commit 3e9a49d

Please sign in to comment.