From 193fd06d1c69fd32870271f6f3acca262782bb3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=84=E5=AE=87=E6=89=AC?= Date: Thu, 27 Jun 2024 08:57:39 +0800 Subject: [PATCH] =?UTF-8?q?tokenizer=E6=B2=A1=E6=9C=89chat=5Ftemplate?= =?UTF-8?q?=E7=9A=84=E6=97=B6=E5=80=99=E7=94=A8fastllm=E7=94=9F=E6=88=90pr?= =?UTF-8?q?ompt?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/model.cpp | 4 +++- tools/fastllm_pytools/llm.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/model.cpp b/src/model.cpp index 1c922b89..c251767b 100644 --- a/src/model.cpp +++ b/src/model.cpp @@ -398,7 +398,9 @@ namespace fastllm { auto tokenizerConfig = json11::Json::parse(ReadAllFile(tokenizerConfigFile), error); model->weight.tokenizer.SetTokenizerConfig(tokenizerConfig); std::string tokenizerClass = tokenizerConfig["tokenizer_class"].string_value(); - if (tokenizerClass == "PreTrainedTokenizerFast" || tokenizerClass == "Qwen2Tokenizer") { + if (tokenizerClass == "PreTrainedTokenizerFast" + || tokenizerClass == "Qwen2Tokenizer" + || tokenizerClass == "BloomTokenizer") { // PreTrainedTokenizerFast std::string tokenizerFile = path + "tokenizer.json"; auto tokenizer = json11::Json::parse(ReadAllFile(tokenizerFile), error); diff --git a/tools/fastllm_pytools/llm.py b/tools/fastllm_pytools/llm.py index c0b4fe08..c853ae27 100644 --- a/tools/fastllm_pytools/llm.py +++ b/tools/fastllm_pytools/llm.py @@ -446,7 +446,7 @@ def get_prompt(self, history = []; messages = [] - if (self.hf_tokenizer != None): + if (self.hf_tokenizer != None and hasattr(self.hf_tokenizer, "chat_template") and self.hf_tokenizer.chat_template != ""): if (self.system_prompt != ""): messages.append({"role": "system", "content": self.system_prompt}) for his in history: