From de88d050384c5b5abb267090e8f5a535aba44764 Mon Sep 17 00:00:00 2001
From: cgli <cgli@iyunwen.com>
Date: Wed, 3 Jul 2024 18:37:04 +0800
Subject: [PATCH 1/3] =?UTF-8?q?=E4=BF=AE=E5=A4=8DWindows=E7=BC=96=E8=AF=91?=
 =?UTF-8?q?=E5=8F=8AHF=E6=A8=A1=E5=9E=8B=E7=9B=B4=E6=8E=A5=E8=AF=BB?=
 =?UTF-8?q?=E5=8F=96=E9=97=AE=E9=A2=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 example/Win32Demo/fastllm-gpu.vcxproj         |  4 ++++
 example/Win32Demo/fastllm-gpu.vcxproj.filters | 12 ++++++++++++
 example/Win32Demo/fastllm.vcxproj             |  4 ++++
 example/Win32Demo/fastllm.vcxproj.filters     | 12 ++++++++++++
 src/model.cpp                                 |  4 ++--
 5 files changed, 34 insertions(+), 2 deletions(-)
diff --git a/example/Win32Demo/fastllm-gpu.vcxproj b/example/Win32Demo/fastllm-gpu.vcxproj
index ee8c0c0b..46ebf855 100644
--- a/example/Win32Demo/fastllm-gpu.vcxproj
+++ b/example/Win32Demo/fastllm-gpu.vcxproj
@@ -201,6 +201,7 @@
     <ClInclude Include="..\..\include\devices\cuda\cudadevice.h" />
     <ClInclude Include="..\..\include\executor.h" />
     <ClInclude Include="..\..\include\fastllm.h" />
+    <ClInclude Include="..\..\include\graph.h" />
     <ClInclude Include="..\..\include\model.h" />
     <ClInclude Include="..\..\include\models\basellm.h" />
     <ClInclude Include="..\..\include\models\bert.h" />
@@ -208,6 +209,7 @@
     <ClInclude Include="..\..\include\models\deepseekv2.h" />
     <ClInclude Include="..\..\include\models\factoryllm.h" />
     <ClInclude Include="..\..\include\models\glm.h" />
+    <ClInclude Include="..\..\include\models\graphllm.h" />
     <ClInclude Include="..\..\include\models\internlm2.h" />
     <ClInclude Include="..\..\include\models\llama.h" />
     <ClInclude Include="..\..\include\models\minicpm.h" />
@@ -227,12 +229,14 @@
     <ClCompile Include="..\..\src\devices\cuda\cudadevicebatch.cpp" />
     <ClCompile Include="..\..\src\executor.cpp" />
     <ClCompile Include="..\..\src\fastllm.cpp" />
+    <ClCompile Include="..\..\src\graph.cpp" />
     <ClCompile Include="..\..\src\model.cpp" />
     <ClCompile Include="..\..\src\models\basellm.cpp" />
     <ClCompile Include="..\..\src\models\bert.cpp" />
     <ClCompile Include="..\..\src\models\chatglm.cpp" />
     <ClCompile Include="..\..\src\models\deepseekv2.cpp" />
     <ClCompile Include="..\..\src\models\glm.cpp" />
+    <ClCompile Include="..\..\src\models\graphllm.cpp" />
     <ClCompile Include="..\..\src\models\internlm2.cpp" />
     <ClCompile Include="..\..\src\models\llama.cpp" />
     <ClCompile Include="..\..\src\models\minicpm.cpp" />
diff --git a/example/Win32Demo/fastllm-gpu.vcxproj.filters b/example/Win32Demo/fastllm-gpu.vcxproj.filters
index f8326fbf..8b8a5a10 100644
--- a/example/Win32Demo/fastllm-gpu.vcxproj.filters
+++ b/example/Win32Demo/fastllm-gpu.vcxproj.filters
@@ -57,6 +57,9 @@
     <ClInclude Include="..\..\include\fastllm.h">
       <Filter>头文件</Filter>
     </ClInclude>
+    <ClInclude Include="..\..\include\graph.h">
+      <Filter>头文件</Filter>
+    </ClInclude>
     <ClInclude Include="..\..\include\model.h">
       <Filter>头文件</Filter>
     </ClInclude>
@@ -81,6 +84,9 @@
     <ClInclude Include="..\..\include\models\glm.h">
       <Filter>头文件\models</Filter>
     </ClInclude>
+    <ClInclude Include="..\..\include\models\graphllm.h">
+      <Filter>头文件\models</Filter>
+    </ClInclude>
     <ClInclude Include="..\..\include\models\internlm2.h">
       <Filter>头文件\models</Filter>
     </ClInclude>
@@ -134,6 +140,9 @@
     <ClCompile Include="..\..\src\fastllm.cpp">
       <Filter>源文件</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\src\graph.cpp">
+      <Filter>源文件</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\src\model.cpp">
       <Filter>源文件</Filter>
     </ClCompile>
@@ -155,6 +164,9 @@
     <ClCompile Include="..\..\src\models\glm.cpp">
       <Filter>源文件\models</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\src\models\graphllm.cpp">
+      <Filter>源文件\models</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\src\models\internlm2.cpp">
       <Filter>源文件\models</Filter>
     </ClCompile>
diff --git a/example/Win32Demo/fastllm.vcxproj b/example/Win32Demo/fastllm.vcxproj
index d684c814..a1bd9596 100644
--- a/example/Win32Demo/fastllm.vcxproj
+++ b/example/Win32Demo/fastllm.vcxproj
@@ -177,6 +177,7 @@
     <ClInclude Include="..\..\include\devices\cpu\cputhreadpool.h" />
     <ClInclude Include="..\..\include\executor.h" />
     <ClInclude Include="..\..\include\fastllm.h" />
+    <ClInclude Include="..\..\include\graph.h" />
     <ClInclude Include="..\..\include\model.h" />
     <ClInclude Include="..\..\include\models\basellm.h" />
     <ClInclude Include="..\..\include\models\bert.h" />
@@ -184,6 +185,7 @@
     <ClInclude Include="..\..\include\models\deepseekv2.h" />
     <ClInclude Include="..\..\include\models\factoryllm.h" />
     <ClInclude Include="..\..\include\models\glm.h" />
+    <ClInclude Include="..\..\include\models\graphllm.h" />
     <ClInclude Include="..\..\include\models\internlm2.h" />
     <ClInclude Include="..\..\include\models\llama.h" />
     <ClInclude Include="..\..\include\models\minicpm.h" />
@@ -201,12 +203,14 @@
     <ClCompile Include="..\..\src\devices\cpu\cpudevicebatch.cpp" />
     <ClCompile Include="..\..\src\executor.cpp" />
     <ClCompile Include="..\..\src\fastllm.cpp" />
+    <ClCompile Include="..\..\src\graph.cpp" />
     <ClCompile Include="..\..\src\model.cpp" />
     <ClCompile Include="..\..\src\models\basellm.cpp" />
     <ClCompile Include="..\..\src\models\bert.cpp" />
     <ClCompile Include="..\..\src\models\chatglm.cpp" />
     <ClCompile Include="..\..\src\models\deepseekv2.cpp" />
     <ClCompile Include="..\..\src\models\glm.cpp" />
+    <ClCompile Include="..\..\src\models\graphllm.cpp" />
     <ClCompile Include="..\..\src\models\internlm2.cpp" />
     <ClCompile Include="..\..\src\models\llama.cpp" />
     <ClCompile Include="..\..\src\models\minicpm.cpp" />
diff --git a/example/Win32Demo/fastllm.vcxproj.filters b/example/Win32Demo/fastllm.vcxproj.filters
index afe15976..94ea3a95 100644
--- a/example/Win32Demo/fastllm.vcxproj.filters
+++ b/example/Win32Demo/fastllm.vcxproj.filters
@@ -57,6 +57,9 @@
     <ClInclude Include="..\..\include\fastllm.h">
       <Filter>头文件</Filter>
     </ClInclude>
+    <ClInclude Include="..\..\include\graph.h">
+      <Filter>头文件</Filter>
+    </ClInclude>
     <ClInclude Include="..\..\include\model.h">
       <Filter>头文件</Filter>
     </ClInclude>
@@ -81,6 +84,9 @@
     <ClInclude Include="..\..\include\models\glm.h">
       <Filter>头文件\models</Filter>
     </ClInclude>
+    <ClInclude Include="..\..\include\models\graphllm.h">
+      <Filter>头文件\models</Filter>
+    </ClInclude>
     <ClInclude Include="..\..\include\models\internlm2.h">
       <Filter>头文件\models</Filter>
     </ClInclude>
@@ -128,6 +134,9 @@
     <ClCompile Include="..\..\src\fastllm.cpp">
       <Filter>源文件</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\src\graph.cpp">
+      <Filter>源文件</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\src\model.cpp">
       <Filter>源文件</Filter>
     </ClCompile>
@@ -149,6 +158,9 @@
     <ClCompile Include="..\..\src\models\glm.cpp">
       <Filter>源文件\models</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\src\models\graphllm.cpp">
+      <Filter>源文件\models</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\src\models\internlm2.cpp">
       <Filter>源文件\models</Filter>
     </ClCompile>
diff --git a/src/model.cpp b/src/model.cpp
index f15f711f..ad68e4d9 100644
--- a/src/model.cpp
+++ b/src/model.cpp
@@ -262,9 +262,9 @@ namespace fastllm {
             ClearBuffer();
             buffer = new uint8_t[len * unitSize];
 
-            FILE *fi = fopen(this->fileName.c_str(), "r");
+            FILE *fi = fopen(this->fileName.c_str(), "rb");
             int ret;
-#if defined(_WIN32) or defined(_WIN64)
+#if defined(_WIN32) || defined(_WIN64)
             _fseeki64(fi, this->data_offsets[0], 0);
 #else
             fseek(fi, this->data_offsets[0], 0);

From 2793c4054d2e4a303f28da8ec4f79d7ef084e9e3 Mon Sep 17 00:00:00 2001
From: cgli <cgli@iyunwen.com>
Date: Wed, 3 Jul 2024 20:44:05 +0800
Subject: [PATCH 2/3] =?UTF-8?q?=E6=94=AF=E6=8C=81=E4=BF=9D=E5=AD=98?=
 =?UTF-8?q?=E7=9B=B4=E6=8E=A5=E8=AF=BB=E5=8F=96safetrensors=E5=BE=97?=
 =?UTF-8?q?=E5=88=B0=E7=9A=84llama=E7=B1=BB=E6=A8=A1=E5=9E=8B=E4=B8=BAflm?=
 =?UTF-8?q?=E6=A0=BC=E5=BC=8F=EF=BC=8C=E5=B9=B6=E5=8A=A0=E8=BD=BD=E6=8E=A8?=
 =?UTF-8?q?=E7=90=86?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 include/devices/cpu/alivethreadpool.h | 2 +-
 src/fastllm.cpp                       | 2 ++
 src/model.cpp                         | 7 ++++++-
 src/models/llama.cpp                  | 6 +++++-
 4 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/include/devices/cpu/alivethreadpool.h b/include/devices/cpu/alivethreadpool.h
index f64132fc..2c963961 100644
--- a/include/devices/cpu/alivethreadpool.h
+++ b/include/devices/cpu/alivethreadpool.h
@@ -62,7 +62,7 @@ namespace fastllm {
                 auto duration = std::chrono::duration_cast<std::chrono::microseconds> (std::chrono::system_clock::now() - lastRunTime);
                 double gap = double(duration.count()) * std::chrono::microseconds::period::num / std::chrono::microseconds::period::den;
                 if (gap > 3) {
-                    std::this_thread::sleep_for(std::chrono::seconds(0));
+                    std::this_thread::sleep_for(std::chrono::microseconds(2));
                 }
             }
         }
diff --git a/src/fastllm.cpp b/src/fastllm.cpp
index 5b2c6a27..eab1d29e 100644
--- a/src/fastllm.cpp
+++ b/src/fastllm.cpp
@@ -1976,6 +1976,8 @@ namespace fastllm {
             }
             tokenizer.SetSpecialTokens(specialTokens);
         }
+        if (this->dicts.find("chat_template") != this->dicts.end())
+            tokenizer.chatTemplate = this->dicts["chat_template"];
 
         int len = buffer.ReadInt();
         for (int i = 0; i < len; i++) {
diff --git a/src/model.cpp b/src/model.cpp
index ad68e4d9..8fbc5ef9 100644
--- a/src/model.cpp
+++ b/src/model.cpp
@@ -424,6 +424,8 @@ namespace fastllm {
         std::string tokenizerConfigFile = path + "tokenizer_config.json";
         auto tokenizerConfig = json11::Json::parse(ReadAllFile(tokenizerConfigFile), error);
         model->weight.tokenizer.SetTokenizerConfig(tokenizerConfig);
+        if (!model->weight.tokenizer.chatTemplate.empty() && model->weight.dicts.find("chat_template") == model->weight.dicts.end())
+            model->weight.AddDict("chat_template", model->weight.tokenizer.chatTemplate);
         std::string tokenizerClass = tokenizerConfig["tokenizer_class"].string_value();
         if (tokenizerClass == "PreTrainedTokenizerFast" 
             || tokenizerClass == "Qwen2Tokenizer"
@@ -439,10 +441,13 @@ namespace fastllm {
                 spTokens[it["content"].string_value()] = it["id"].int_value();
             }
             model->weight.tokenizer.SetSpecialTokens(spTokens);
+            if (!spTokens.empty())
+                model->weight.AddDict("tokenizer_has_special_tokens", "1");
 
             if (!tokenizer["decoder"].is_null() && !tokenizer["decoder"]["type"].is_null() && 
                 tokenizer["decoder"]["type"].string_value() == "ByteLevel") {
                 model->weight.tokenizer.byteAsChar = true;
+                model->weight.AddDict("tokenizer_byte_as_char", "True");
             }
         } else if (tokenizerClass == "ChatGLM4Tokenizer") {
             // GLM4御用的分词
@@ -515,7 +520,7 @@ namespace fastllm {
         auto config = json11::Json::parse(ReadAllFile(configFile), error);
         basellm *model = CreateModelWithType(config["model_type"].string_value());
         for (auto &it : config.object_items()) {
-            model->weight.AddDict(it.first, it.second.dump().c_str());
+            model->weight.AddDict(it.first, it.second.is_string() ? it.second.string_value() : it.second.dump());
         }
         // 设置eos_token_id
         if (config["eos_token_id"].is_array()) {
diff --git a/src/models/llama.cpp b/src/models/llama.cpp
index 6e2724ce..18c7cf26 100644
--- a/src/models/llama.cpp
+++ b/src/models/llama.cpp
@@ -151,7 +151,7 @@ namespace fastllm {
                 std::string mergeQkvWeightName = "model.layers." + std::to_string(i) + ".self_attn.mergeqkv.weight";
                 std::string mergeQkvBiasName = "model.layers." + std::to_string(i) + ".self_attn.mergeqkv.bias";
 
-                if (weight.weight.find(qkvWeightName) != weight.weight.end()) {
+                if (weight.weight.find(qkvWeightName) != weight.weight.end() || weight.weight.find(mergeQkvWeightName) != weight.weight.end()) {
                     mergeQKV = true;
                     break;
                 } else {
@@ -214,6 +214,10 @@ namespace fastllm {
                 std::string w3WeightName = "model.layers." + std::to_string(i) + ".mlp.up_proj.weight";
                 std::string swigluWeightName = "model.layers." + std::to_string(i) + ".mlp.gateup_proj.weight";
 
+                if (weight.weight.find(swigluWeightName) != weight.weight.end()) {
+                    mergeQKV = true;
+                    break;
+                }
                 Data &w1 = weight.weight[w1WeightName], &w3 = weight.weight[w3WeightName];
                 if ((w1.dataType == DataType::INT4_GROUP && w1.dims[1] % w1.groupCnt != 0) || 
                     (w3.dataType == DataType::INT4_GROUP && w3.dims[1] % w3.groupCnt != 0)) {

From e787aa64b7ff7824b5db8e30e760d1f83da259c1 Mon Sep 17 00:00:00 2001
From: cgli <cgli@iyunwen.com>
Date: Sun, 7 Jul 2024 15:31:43 +0800
Subject: [PATCH 3/3] =?UTF-8?q?=E4=BF=9D=E5=AD=98=E7=9B=B4=E6=8E=A5?=
 =?UTF-8?q?=E8=AF=BB=E5=8F=96=E7=9A=84glm4=E7=B1=BB=E6=A8=A1=E5=9E=8B?=
 =?UTF-8?q?=E4=B8=BAflm=E6=A0=BC=E5=BC=8F(#465)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/model.cpp                      | 31 ++++++++++++++++++++++++------
 src/models/chatglm.cpp             |  8 ++++++++
 tools/fastllm_pytools/hf_model.py  |  4 ++--
 tools/fastllm_pytools/torch2flm.py |  4 ++--
 4 files changed, 37 insertions(+), 10 deletions(-)

diff --git a/src/model.cpp b/src/model.cpp
index 8fbc5ef9..2f1e55a6 100644
--- a/src/model.cpp
+++ b/src/model.cpp
@@ -66,11 +66,21 @@ namespace fastllm {
 
     void basellm::InitParams() {
         if (this->weight.dicts.find("bos_token_id") != this->weight.dicts.end()) {
-            if(this->weight.dicts["bos_token_id"]!="None"){
+            if (this->weight.dicts["bos_token_id"]!="None") {
                 this->bos_token_id = atoi(this->weight.dicts["bos_token_id"].c_str());
             }
-            if(this->weight.dicts["eos_token_id"]!="None"){
-                this->eos_token_id = atoi(this->weight.dicts["eos_token_id"].c_str());
+        }
+        if (this->weight.dicts.find("eos_token_id") != this->weight.dicts.end()) {
+            if (this->weight.dicts["eos_token_id"]!="None") {
+                if (this->weight.dicts["eos_token_id"][0] == '[' && this->eos_token_ids.empty()) {
+                    std::string error;
+                    json11::Json ids = json11::Json::parse(this->weight.dicts["eos_token_id"], error);
+                    for (auto &it : ids.array_items()) {
+                        this->eos_token_ids.insert(it.int_value());
+                    }
+                } else {
+                    this->eos_token_id = atoi(this->weight.dicts["eos_token_id"].c_str());
+                }
             }
         }
         if (this->weight.dicts.find("im_start_id") != this->weight.dicts.end()) {
@@ -127,6 +137,16 @@ namespace fastllm {
     }
 
     void basellm::SaveModel(const std::string &fileName) {
+        if (this->weight.tokenizer.chatTemplate.empty()) {
+            if (this->weight.dicts.find("pre_prompt") == this->weight.dicts.end())
+                this->weight.dicts["pre_prompt"] = pre_prompt;
+            if (this->weight.dicts.find("user_role") == this->weight.dicts.end())
+                this->weight.dicts["user_role"] = user_role;
+            if (this->weight.dicts.find("bot_role") == this->weight.dicts.end())
+                this->weight.dicts["bot_role"] = bot_role;
+            if (this->weight.dicts.find("history_sep") == this->weight.dicts.end())
+                this->weight.dicts["history_sep"] = history_sep;
+        }
         this->weight.SaveLowBitModel(fileName, 0);
     }
 
@@ -451,7 +471,6 @@ namespace fastllm {
             }
         } else if (tokenizerClass == "ChatGLM4Tokenizer") {
             // GLM4御用的分词
-            model->bot_role = " ";
             std::vector <std::string> lines, line;
             SplitString(ReadAllFile(path + "tokenizer.model"), {'\r', '\n'}, lines);
             for (int i = 0; i < lines.size(); i++) {
@@ -463,8 +482,8 @@ namespace fastllm {
                 spTokens[it.second["content"].string_value()] = atoi(it.first.c_str());
             }
             model->weight.tokenizer.SetSpecialTokens(spTokens);
-            ((ChatGLMModel*)model)->gmask_token_id = model->weight.tokenizer.GetTokenId("[gMASK]");
-            ((ChatGLMModel*)model)->bos_token_id = model->weight.tokenizer.GetTokenId("<sop>");
+            model->weight.AddDict("tokenizer_has_special_tokens", "1");
+            model->weight.AddDict("tokenizer_class", tokenizerClass);
             ((ChatGLMModel*)model)->tokenizerClass = tokenizerClass;
 
             // ChatGLM采用拼接token的方法，需要强行指定分割词的TokenID
diff --git a/src/models/chatglm.cpp b/src/models/chatglm.cpp
index 7f786b3e..a6e0e8b1 100644
--- a/src/models/chatglm.cpp
+++ b/src/models/chatglm.cpp
@@ -80,6 +80,9 @@ namespace fastllm {
 
     void ChatGLMModel::InitParams() {
         basellm::InitParams();
+        if (this->weight.dicts.find("tokenizer_class") != this->weight.dicts.end()) {
+            this->tokenizerClass = this->weight.dicts["tokenizer_class"];
+        }
         if (GetVersion() == 1) {
             if (this->weight.dicts.find("gmask_token_id") != this->weight.dicts.end()) {
                 this->gmask_token_id = atoi(this->weight.dicts["gmask_token_id"].c_str());
@@ -97,6 +100,11 @@ namespace fastllm {
         if (this->weight.dicts.find("rope_ratio") != this->weight.dicts.end()) {            
             UpdateRotaryPosEmb(atof(this->weight.dicts["rope_ratio"].c_str()));
         }
+        if (this->tokenizerClass == "ChatGLM4Tokenizer") {
+            this->gmask_token_id = this->weight.tokenizer.GetTokenId("[gMASK]");
+            this->bos_token_id = this->weight.tokenizer.GetTokenId("<sop>");
+            this->weight.tokenizer.type = Tokenizer::TokenizerType::QWEN;
+        }
     }
 
     int ChatGLMModel::Forward(const fastllm::Data &inputIds, const fastllm::Data &attentionMask,
diff --git a/tools/fastllm_pytools/hf_model.py b/tools/fastllm_pytools/hf_model.py
index 3fc2b317..37bbcfb8 100644
--- a/tools/fastllm_pytools/hf_model.py
+++ b/tools/fastllm_pytools/hf_model.py
@@ -94,11 +94,11 @@ def create(model,
         modelInfo["history_sep"] = "";
     if (modelInfo["model_type"] == "chatglm" and hasattr(tokenizer, "name") and tokenizer.name == "GLM4Tokenizer"):
         # glm-4-chat
-        modelInfo["pre_prompt"] = "[gMASK]<sop>";
+        modelInfo["pre_prompt"] = "";
         modelInfo["user_role"] = ("<FLM_FIX_TOKEN_" + str(tokenizer.convert_tokens_to_ids("<|user|>")) + ">\n");
         modelInfo["bot_role"] = ("<FLM_FIX_TOKEN_" + str(tokenizer.convert_tokens_to_ids("<|assistant|>")) + ">");
         modelInfo["history_sep"] = "";
-        modelInfo["eos_token_id"] = "151336"
+        modelInfo["tokenizer_class"] = tokenizer.name;
     if "rope_scaling" in modelInfo and isinstance(modelInfo["rope_scaling"], builtins.dict):
         rope_scaling = modelInfo.pop("rope_scaling")
         modelInfo["rope_scaling.type"] = rope_scaling["type"]
diff --git a/tools/fastllm_pytools/torch2flm.py b/tools/fastllm_pytools/torch2flm.py
index 6ef80111..523fe6b5 100644
--- a/tools/fastllm_pytools/torch2flm.py
+++ b/tools/fastllm_pytools/torch2flm.py
@@ -179,11 +179,11 @@ def tofile(exportPath,
         modelInfo["history_sep"] = "";
     if (modelInfo["model_type"] == "chatglm" and hasattr(tokenizer, "name") and tokenizer.name == "GLM4Tokenizer"):
         # glm-4-chat
-        modelInfo["pre_prompt"] = "[gMASK]<sop>";
+        modelInfo["pre_prompt"] = "";
         modelInfo["user_role"] = ("<FLM_FIX_TOKEN_" + str(tokenizer.convert_tokens_to_ids("<|user|>")) + ">\n");
         modelInfo["bot_role"] = ("<FLM_FIX_TOKEN_" + str(tokenizer.convert_tokens_to_ids("<|assistant|>")) + ">");
         modelInfo["history_sep"] = "";
-        modelInfo["eos_token_id"] = "151336"
+        modelInfo["tokenizer_class"] = tokenizer.name;
     if "rope_scaling" in modelInfo and isinstance(modelInfo["rope_scaling"], builtins.dict):
         rope_scaling = modelInfo.pop("rope_scaling")
         modelInfo["rope_scaling.type"] = rope_scaling["type"]