Skip to content

Commit

Permalink
支持GLM-4-Plus以及Zero思考推理模型
Browse files Browse the repository at this point in the history
  • Loading branch information
Vinlic committed Dec 31, 2024
1 parent 57b042d commit 719e3b6
Show file tree
Hide file tree
Showing 6 changed files with 107 additions and 30 deletions.
14 changes: 8 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
![](https://img.shields.io/github/forks/llm-red-team/glm-free-api.svg)
![](https://img.shields.io/docker/pulls/vinlic/glm-free-api.svg)

支持高速流式输出、支持多轮对话、支持智能体对话、支持视频生成、支持AI绘图、支持联网搜索、支持长文档解读、支持图像解析,零配置部署,多路token支持,自动清理会话痕迹。
支持GLM-4-Plus高速流式输出、支持多轮对话、支持智能体对话、支持Zero思考推理模型、支持视频生成、支持AI绘图、支持联网搜索、支持长文档解读、支持图像解析,零配置部署,多路token支持,自动清理会话痕迹。

与ChatGPT接口完全兼容。

Expand Down Expand Up @@ -286,8 +286,10 @@ Authorization: Bearer [refresh_token]
请求数据:
```json
{
// 如果使用智能体请填写智能体ID到此处,否则可以乱填
"model": "glm4",
// 默认模型:glm-4-plus
// zero思考推理模型:glm-4-zero / glm-4-think
// 如果使用智能体请填写智能体ID到此处
"model": "glm-4-plus",
// 目前多轮对话基于消息合并实现,某些场景可能导致能力下降且受单轮最大token数限制
// 如果您想获得原生的多轮对话体验,可以传入首轮消息获得的id,来接续上下文
// "conversation_id": "65f6c28546bae1f0fbb532de",
Expand All @@ -307,7 +309,7 @@ Authorization: Bearer [refresh_token]
{
// 如果想获得原生多轮对话体验,此id,你可以传入到下一轮对话的conversation_id来接续上下文
"id": "65f6c28546bae1f0fbb532de",
"model": "glm4",
"model": "glm-4",
"object": "chat.completion",
"choices": [
{
Expand Down Expand Up @@ -432,7 +434,7 @@ Authorization: Bearer [refresh_token]
```json
{
// 如果使用智能体请填写智能体ID到此处,否则可以乱填
"model": "glm4",
"model": "glm-4",
"messages": [
{
"role": "user",
Expand All @@ -459,7 +461,7 @@ Authorization: Bearer [refresh_token]
```json
{
"id": "cnmuo7mcp7f9hjcmihn0",
"model": "glm4",
"model": "glm-4",
"object": "chat.completion",
"choices": [
{
Expand Down
12 changes: 7 additions & 5 deletions README_EN.md
Original file line number Diff line number Diff line change
Expand Up @@ -289,8 +289,10 @@ Authorization: Bearer [refresh_token]
Request data:
```json
{
// Except using the Agent to fill the ID, fill in the model name as you like.
"model": "glm4",
// Default model: glm-4-plus
// zero thinking model: glm-4-zero / glm-4-think
// If using the Agent, fill in the Agent ID here
"model": "glm-4",
// Currently, multi-round conversations are realized based on message merging, which in some scenarios may lead to capacity degradation and is limited by the maximum number of tokens in a single round.
// If you want a native multi-round dialog experience, you can pass in the ids obtained from the last round of messages to pick up the context
// "conversation_id": "65f6c28546bae1f0fbb532de",
Expand All @@ -309,7 +311,7 @@ Response data:
```json
{
"id": "65f6c28546bae1f0fbb532de",
"model": "glm4",
"model": "glm-4",
"object": "chat.completion",
"choices": [
{
Expand Down Expand Up @@ -434,7 +436,7 @@ Request data:
```json
{
// 如果使用智能体请填写智能体ID到此处,否则可以乱填
"model": "glm4",
"model": "glm-4",
"messages": [
{
"role": "user",
Expand All @@ -461,7 +463,7 @@ Response data:
```json
{
"id": "cnmuo7mcp7f9hjcmihn0",
"model": "glm4",
"model": "glm-4",
"object": "chat.completion",
"choices": [
{
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "glm-free-api",
"version": "0.0.33",
"version": "0.0.34",
"description": "GLM Free API Server",
"type": "module",
"main": "dist/index.js",
Expand Down
95 changes: 80 additions & 15 deletions src/api/controllers/chat.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ import util from "@/lib/util.ts";
const MODEL_NAME = "glm";
// 默认的智能体ID,GLM4
const DEFAULT_ASSISTANT_ID = "65940acff94777010aa6b796";
// zero推理模型智能体ID
const ZERO_ASSISTANT_ID = "676411c38945bbc58a905d31";
// access_token有效期
const ACCESS_TOKEN_EXPIRES = 3600;
// 最大重试次数
Expand Down Expand Up @@ -165,13 +167,13 @@ async function removeConversation(
*
* @param messages 参考gpt系列消息格式,多轮对话请完整提供上下文
* @param refreshToken 用于刷新access_token的refresh_token
* @param assistantId 智能体ID,默认使用GLM4原版
* @param model 智能体ID,默认使用GLM4原版
* @param retryCount 重试次数
*/
async function createCompletion(
messages: any[],
refreshToken: string,
assistantId = DEFAULT_ASSISTANT_ID,
model = MODEL_NAME,
refConvId = "",
retryCount = 0
) {
Expand All @@ -189,6 +191,13 @@ async function createCompletion(
// 如果引用对话ID不正确则重置引用
if (!/[0-9a-zA-Z]{24}/.test(refConvId)) refConvId = "";

let assistantId = /^[a-z0-9]{24,}$/.test(model) ? model : undefined;

if(model.indexOf('think') != -1 || model.indexOf('zero') != -1) {
assistantId = ZERO_ASSISTANT_ID;
logger.info('使用思考模型');
}

// 请求流
const token = await acquireToken(refreshToken);
const result = await axios.post(
Expand All @@ -200,8 +209,11 @@ async function createCompletion(
meta_data: {
channel: "",
draft_id: "",
if_plus_model: true,
input_question_type: "xxxx",
is_test: false,
platform: "pc",
quote_log_id: ""
},
},
{
Expand Down Expand Up @@ -231,7 +243,7 @@ async function createCompletion(

const streamStartTime = util.timestamp();
// 接收流为输出文本
const answer = await receiveStream(result.data);
const answer = await receiveStream(model, result.data);
logger.success(
`Stream has completed transfer ${util.timestamp() - streamStartTime}ms`
);
Expand All @@ -251,7 +263,7 @@ async function createCompletion(
return createCompletion(
messages,
refreshToken,
assistantId,
model,
refConvId,
retryCount + 1
);
Expand All @@ -266,13 +278,13 @@ async function createCompletion(
*
* @param messages 参考gpt系列消息格式,多轮对话请完整提供上下文
* @param refreshToken 用于刷新access_token的refresh_token
* @param assistantId 智能体ID,默认使用GLM4原版
* @param model 智能体ID,默认使用GLM4原版
* @param retryCount 重试次数
*/
async function createCompletionStream(
messages: any[],
refreshToken: string,
assistantId = DEFAULT_ASSISTANT_ID,
model = MODEL_NAME,
refConvId = "",
retryCount = 0
) {
Expand All @@ -290,6 +302,13 @@ async function createCompletionStream(
// 如果引用对话ID不正确则重置引用
if (!/[0-9a-zA-Z]{24}/.test(refConvId)) refConvId = "";

let assistantId = /^[a-z0-9]{24,}$/.test(model) ? model : undefined;

if(model.indexOf('think') != -1 || model.indexOf('zero') != -1) {
assistantId = ZERO_ASSISTANT_ID;
logger.info('使用思考模型');
}

// 请求流
const token = await acquireToken(refreshToken);
const result = await axios.post(
Expand All @@ -301,8 +320,11 @@ async function createCompletionStream(
meta_data: {
channel: "",
draft_id: "",
if_plus_model: true,
input_question_type: "xxxx",
is_test: false,
platform: "pc",
quote_log_id: ""
},
},
{
Expand Down Expand Up @@ -354,7 +376,7 @@ async function createCompletionStream(

const streamStartTime = util.timestamp();
// 创建转换流将消息格式转换为gpt兼容格式
return createTransStream(result.data, (convId: string) => {
return createTransStream(model, result.data, (convId: string) => {
logger.success(
`Stream has completed transfer ${util.timestamp() - streamStartTime}ms`
);
Expand All @@ -372,7 +394,7 @@ async function createCompletionStream(
return createCompletionStream(
messages,
refreshToken,
assistantId,
model,
refConvId,
retryCount + 1
);
Expand Down Expand Up @@ -407,8 +429,11 @@ async function generateImages(
meta_data: {
channel: "",
draft_id: "",
if_plus_model: true,
input_question_type: "xxxx",
is_test: false,
platform: "pc",
quote_log_id: ""
},
},
{
Expand Down Expand Up @@ -904,14 +929,15 @@ function checkResult(result: AxiosResponse, refreshToken: string) {
/**
* 从流接收完整的消息内容
*
* @param model 模型
* @param stream 消息流
*/
async function receiveStream(stream: any): Promise<any> {
async function receiveStream(model: string, stream: any): Promise<any> {
return new Promise((resolve, reject) => {
// 消息初始化
const data = {
id: "",
model: MODEL_NAME,
model,
object: "chat.completion",
choices: [
{
Expand All @@ -923,13 +949,16 @@ async function receiveStream(stream: any): Promise<any> {
usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 },
created: util.unixTimestamp(),
};
const isSilentModel = model.indexOf('silent') != -1;
let thinkingText = "";
let toolCall = false;
let codeGenerating = false;
let textChunkLength = 0;
let codeTemp = "";
let lastExecutionOutput = "";
let textOffset = 0;
let refContent = "";
logger.info(`是否静默模型: ${isSilentModel}`);
const parser = createParser((event) => {
try {
if (event.type !== "event") return;
Expand Down Expand Up @@ -957,6 +986,7 @@ async function receiveStream(stream: any): Promise<any> {
textChunkLength = 0;
innerStr += "\n";
}

if (type == "text") {
if (toolCall) {
innerStr += "\n";
Expand All @@ -965,11 +995,20 @@ async function receiveStream(stream: any): Promise<any> {
}
if (partStatus == "finish") textChunkLength = text.length;
return innerStr + text;
} else if (
} else if (type == "text_thinking" && !isSilentModel) {
if (toolCall) {
innerStr += "\n";
textOffset++;
toolCall = false;
}
thinkingText = text;
return innerStr;
}else if (
type == "quote_result" &&
status == "finish" &&
meta_data &&
_.isArray(meta_data.metadata_list)
_.isArray(meta_data.metadata_list) &&
!isSilentModel
) {
refContent = meta_data.metadata_list.reduce((meta, v) => {
return meta + `${v.title} - ${v.url}\n`;
Expand Down Expand Up @@ -1032,6 +1071,8 @@ async function receiveStream(stream: any): Promise<any> {
);
data.choices[0].message.content += chunk;
} else {
if(thinkingText)
data.choices[0].message.content = `[思考开始]\n${thinkingText}[思考结束]\n\n${data.choices[0].message.content}`;
data.choices[0].message.content =
data.choices[0].message.content.replace(
/【\d+†(来源|源|source)】/g,
Expand Down Expand Up @@ -1059,26 +1100,30 @@ async function receiveStream(stream: any): Promise<any> {
*
* 将流格式转换为gpt兼容流格式
*
* @param model 模型
* @param stream 消息流
* @param endCallback 传输结束回调
*/
function createTransStream(stream: any, endCallback?: Function) {
function createTransStream(model: string, stream: any, endCallback?: Function) {
// 消息创建时间
const created = util.unixTimestamp();
// 创建转换流
const transStream = new PassThrough();
const isSilentModel = model.indexOf('silent') != -1;
let content = "";
let thinking = false;
let toolCall = false;
let codeGenerating = false;
let textChunkLength = 0;
let thinkingText = "";
let codeTemp = "";
let lastExecutionOutput = "";
let textOffset = 0;
!transStream.closed &&
transStream.write(
`data: ${JSON.stringify({
id: "",
model: MODEL_NAME,
model,
object: "chat.completion.chunk",
choices: [
{
Expand Down Expand Up @@ -1116,18 +1161,38 @@ function createTransStream(stream: any, endCallback?: Function) {
innerStr += "\n";
}
if (type == "text") {
if(thinking) {
innerStr += "[思考结束]\n\n"
textOffset = thinkingText.length + 8;
thinking = false;
}
if (toolCall) {
innerStr += "\n";
textOffset++;
toolCall = false;
}
if (partStatus == "finish") textChunkLength = text.length;
return innerStr + text;
} else if (type == "text_thinking" && !isSilentModel) {
if(!thinking) {
innerStr += "[思考开始]\n";
textOffset = 7;
thinking = true;
}
if (toolCall) {
innerStr += "\n";
textOffset++;
toolCall = false;
}
if (partStatus == "finish") textChunkLength = text.length;
thinkingText += text.substring(thinkingText.length, text.length);
return innerStr + text;
} else if (
type == "quote_result" &&
status == "finish" &&
meta_data &&
_.isArray(meta_data.metadata_list)
_.isArray(meta_data.metadata_list) &&
!isSilentModel
) {
const searchText =
meta_data.metadata_list.reduce(
Expand Down
Loading

0 comments on commit 719e3b6

Please sign in to comment.