From a41a04d7c3b15d68ba9e1314e6843cb7f69aa1c4 Mon Sep 17 00:00:00 2001 From: "Heather Logan (She/Her)" Date: Tue, 12 Sep 2023 09:03:06 +0100 Subject: [PATCH 1/6] remove old messages from chat history when queue limit reached --- backend/src/openai.ts | 47 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/backend/src/openai.ts b/backend/src/openai.ts index efa44fd06..94b2e3cd1 100644 --- a/backend/src/openai.ts +++ b/backend/src/openai.ts @@ -26,6 +26,7 @@ import { FunctionAskQuestionParams, FunctionSendEmailParams, } from "./models/openai"; +import { get_encoding } from "@dqbd/tiktoken"; // OpenAI config let config: Configuration | null = null; @@ -294,6 +295,11 @@ async function chatGptChatCompletion( functions: chatGptFunctions, }); + console.debug( + "chat completion. token info: ", + JSON.stringify(chat_completion.data.usage) + ); + // get the reply return chat_completion.data.choices[0].message ?? null; } @@ -302,8 +308,29 @@ async function chatGptChatCompletion( function getChatCompletionsFromHistory( chatHistory: ChatHistoryMessage[] ): ChatCompletionRequestMessage[] { + // limit the number of tokens sent to GPT + const tokenLimit = 100; + let currentTokens = 0; + + // reverse chat history + const reducedChatHistory = chatHistory.reverse().filter((message) => { + const totalTokens = currentTokens + (message.numTokens ?? 0); + if (totalTokens <= tokenLimit) { + currentTokens = totalTokens; + console.debug("current tokens: ", currentTokens); + return true; + } else { + return false; + } + }); + + // reduce to only the completions + console.debug("Chat history: to reduce ", chatHistory.length); + console.debug("Reduced chat history: to show ", reducedChatHistory.length); + console.log(reducedChatHistory); + const completions: ChatCompletionRequestMessage[] = - chatHistory.length > 0 + reducedChatHistory.reverse().length > 0 ? (chatHistory .filter((message) => message.completion !== null) .map( @@ -311,6 +338,7 @@ function getChatCompletionsFromHistory( (message) => message.completion ) as ChatCompletionRequestMessage[]) : []; + return completions; } @@ -319,10 +347,27 @@ function pushCompletionToHistory( completion: ChatCompletionRequestMessage, messageType: CHAT_MESSAGE_TYPE ) { + // limit the length of the chat history + const maxMessageLength = 1000; + + // gpt-4 and 3.5 models use cl100k_base encoding + const encoding = get_encoding("cl100k_base"); + if (messageType !== CHAT_MESSAGE_TYPE.BOT_BLOCKED) { + // remove the oldest message, not including system role message + if (chatHistory.length >= maxMessageLength) { + if (chatHistory[0].completion?.role !== "system") { + chatHistory.shift(); + } else { + chatHistory.splice(1, 1); + } + } chatHistory.push({ completion: completion, chatMessageType: messageType, + numTokens: completion.content + ? encoding.encode(completion.content).length + : null, }); } else { // do not add the bots reply which was subsequently blocked From 65c327cac26ebaebdf400f2143fff43de8cb4eea Mon Sep 17 00:00:00 2001 From: "Heather Logan (She/Her)" Date: Tue, 12 Sep 2023 16:26:18 +0100 Subject: [PATCH 2/6] filter chat history based on max tokens --- backend/package-lock.json | 6 + backend/package.json | 1 + backend/src/models/chat.ts | 1 + backend/src/openai.ts | 96 ++++++++++++---- backend/test/unit/openai.test.ts | 187 ++++++++++++++++++++++++++++++- 5 files changed, 269 insertions(+), 22 deletions(-) diff --git a/backend/package-lock.json b/backend/package-lock.json index 7544869bd..90ed72a6e 100644 --- a/backend/package-lock.json +++ b/backend/package-lock.json @@ -5,6 +5,7 @@ "packages": { "": { "dependencies": { + "@dqbd/tiktoken": "^1.0.7", "cors": "^2.8.5", "d3-dsv": "^2.0.0", "dotenv": "^16.3.1", @@ -809,6 +810,11 @@ "@jridgewell/sourcemap-codec": "^1.4.10" } }, + "node_modules/@dqbd/tiktoken": { + "version": "1.0.7", + "resolved": "https://registry.npmjs.org/@dqbd/tiktoken/-/tiktoken-1.0.7.tgz", + "integrity": "sha512-bhR5k5W+8GLzysjk8zTMVygQZsgvf7W1F0IlL4ZQ5ugjo5rCyiwGM5d8DYriXspytfu98tv59niang3/T+FoDw==" + }, "node_modules/@eslint-community/eslint-utils": { "version": "4.4.0", "resolved": "https://registry.npmjs.org/@eslint-community/eslint-utils/-/eslint-utils-4.4.0.tgz", diff --git a/backend/package.json b/backend/package.json index e5b072bd6..7fa4e5b30 100644 --- a/backend/package.json +++ b/backend/package.json @@ -1,5 +1,6 @@ { "dependencies": { + "@dqbd/tiktoken": "^1.0.7", "cors": "^2.8.5", "d3-dsv": "^2.0.0", "dotenv": "^16.3.1", diff --git a/backend/src/models/chat.ts b/backend/src/models/chat.ts index 0ce3cd045..66cce5fdb 100644 --- a/backend/src/models/chat.ts +++ b/backend/src/models/chat.ts @@ -59,6 +59,7 @@ interface ChatHttpResponse { interface ChatHistoryMessage { completion: ChatCompletionRequestMessage | null; chatMessageType: CHAT_MESSAGE_TYPE; + numTokens?: number | null; infoMessage?: string | null; } diff --git a/backend/src/openai.ts b/backend/src/openai.ts index 94b2e3cd1..a523b8aea 100644 --- a/backend/src/openai.ts +++ b/backend/src/openai.ts @@ -53,6 +53,7 @@ const chatGptFunctions = [ }, confirmed: { type: "boolean", + default: "false", description: "whether the user has confirmed the email is correct before sending", }, @@ -304,34 +305,78 @@ async function chatGptChatCompletion( return chat_completion.data.choices[0].message ?? null; } +function countChatHistoryTokens(chatHistory: ChatHistoryMessage[]) { + let sumTokens = 0; + chatHistory.forEach((message) => { + if (message.numTokens) { + sumTokens += message.numTokens; + } + }); + return sumTokens; +} + +// take only the chat history to send to GPT that is within the max tokens +function filterChatHistoryByMaxTokens( + list: ChatHistoryMessage[], + maxNumTokens: number +): ChatHistoryMessage[] { + let sumTokens = 0; + const filteredList: ChatHistoryMessage[] = []; + + // reverse list to add from most recent + const reverseList = list.slice().reverse(); + + // always add the most recent message to start of list + filteredList.push(reverseList[0]); + sumTokens += reverseList[0].numTokens ?? 0; + + // if the first message is a system role add it to list + if (list[0].completion?.role === "system") { + sumTokens += list[0].numTokens ?? 0; + filteredList.push(list[0]); + } + + // add elements after first message until max tokens reached + for (let i = 1; i < reverseList.length; i++) { + const element = reverseList[i]; + if (element.completion && element.numTokens) { + // if we reach end and system role is there skip as it's already been added + if (element.completion.role === "system") { + continue; + } + if (sumTokens + element.numTokens <= maxNumTokens) { + filteredList.splice(i, 0, element); + sumTokens += element.numTokens; + } else { + console.debug("max tokens reached on element = ", element); + break; + } + } + } + return filteredList.reverse(); +} + // take only the completions to send to GPT function getChatCompletionsFromHistory( chatHistory: ChatHistoryMessage[] ): ChatCompletionRequestMessage[] { // limit the number of tokens sent to GPT - const tokenLimit = 100; - let currentTokens = 0; - - // reverse chat history - const reducedChatHistory = chatHistory.reverse().filter((message) => { - const totalTokens = currentTokens + (message.numTokens ?? 0); - if (totalTokens <= tokenLimit) { - currentTokens = totalTokens; - console.debug("current tokens: ", currentTokens); - return true; - } else { - return false; - } - }); + const maxTokens = 500; + const reducedChatHistory: ChatHistoryMessage[] = filterChatHistoryByMaxTokens( + chatHistory, + maxTokens + ); + console.debug( + "number of tokens in chat history", + countChatHistoryTokens(chatHistory) + ); - // reduce to only the completions - console.debug("Chat history: to reduce ", chatHistory.length); - console.debug("Reduced chat history: to show ", reducedChatHistory.length); + console.log("reduced chat history: "); console.log(reducedChatHistory); const completions: ChatCompletionRequestMessage[] = - reducedChatHistory.reverse().length > 0 - ? (chatHistory + reducedChatHistory.length > 0 + ? (reducedChatHistory .filter((message) => message.completion !== null) .map( // we know the completion is not null here @@ -339,6 +384,11 @@ function getChatCompletionsFromHistory( ) as ChatCompletionRequestMessage[]) : []; + console.debug( + "number of tokens in reduced chat history", + countChatHistoryTokens(reducedChatHistory) + ); + return completions; } @@ -504,4 +554,10 @@ async function chatGptSendMessage( } } -export { chatGptSendMessage, setOpenAiApiKey, validateApiKey, setGptModel }; +export { + chatGptSendMessage, + filterChatHistoryByMaxTokens, + setOpenAiApiKey, + validateApiKey, + setGptModel, +}; diff --git a/backend/test/unit/openai.test.ts b/backend/test/unit/openai.test.ts index e02709b5d..a71539736 100644 --- a/backend/test/unit/openai.test.ts +++ b/backend/test/unit/openai.test.ts @@ -1,7 +1,15 @@ import { OpenAIApi } from "openai"; -import { validateApiKey, setOpenAiApiKey } from "../../src/openai"; +import { + validateApiKey, + setOpenAiApiKey, + filterChatHistoryByMaxTokens, +} from "../../src/openai"; import { initQAModel } from "../../src/langchain"; -import { CHAT_MODELS } from "../../src/models/chat"; +import { + CHAT_MESSAGE_TYPE, + CHAT_MODELS, + ChatHistoryMessage, +} from "../../src/models/chat"; // Define a mock implementation for the createChatCompletion method const mockCreateChatCompletion = jest.fn(); @@ -72,6 +80,181 @@ test("GIVEN an invalid API key WHEN calling setOpenAiApiKey THEN it should set t expect(initQAModel).not.toHaveBeenCalled(); }); +test("GIVEN chat history exceeds max token number WHEN applying filter THEN it should return the filtered chat history", () => { + const maxTokens = 50; + const chatHistory: ChatHistoryMessage[] = [ + { + completion: { + role: "user", + content: "Hello, my name is Bob.", + }, + numTokens: 15, + chatMessageType: CHAT_MESSAGE_TYPE.USER, + }, + { + completion: { + role: "assistant", + content: "Hello, how are you?", + }, + numTokens: 17, + chatMessageType: CHAT_MESSAGE_TYPE.BOT, + }, + { + completion: { + role: "user", + content: "Send an email to my boss to tell him I quit.", + }, + numTokens: 30, + chatMessageType: CHAT_MESSAGE_TYPE.USER, + }, + ]; + // expect that the first message is discounted + const expectedFilteredChatHistory = [ + { + completion: { + role: "assistant", + content: "Hello, how are you?", + }, + numTokens: 17, + chatMessageType: CHAT_MESSAGE_TYPE.BOT, + }, + { + completion: { + role: "user", + content: "Send an email to my boss to tell him I quit.", + }, + numTokens: 30, + chatMessageType: CHAT_MESSAGE_TYPE.USER, + }, + ]; + + const filteredChatHistory = filterChatHistoryByMaxTokens( + chatHistory, + maxTokens + ); + expect(filteredChatHistory).toEqual(expectedFilteredChatHistory); +}); + +test("GIVEN chat history does not exceed max token number WHEN applying filter THEN it should return the original chat history", () => { + const maxTokens = 1000; + const chatHistory: ChatHistoryMessage[] = [ + { + completion: { + role: "user", + content: "Hello, my name is Bob.", + }, + numTokens: 15, + chatMessageType: CHAT_MESSAGE_TYPE.USER, + }, + { + completion: { + role: "assistant", + content: "Hello, how are you?", + }, + numTokens: 17, + chatMessageType: CHAT_MESSAGE_TYPE.BOT, + }, + { + completion: { + role: "user", + content: "Send an email to my boss to tell him I quit.", + }, + numTokens: 30, + chatMessageType: CHAT_MESSAGE_TYPE.USER, + }, + ]; + + const filteredChatHistory = filterChatHistoryByMaxTokens( + chatHistory, + maxTokens + ); + expect(filteredChatHistory).toEqual(chatHistory); +}); + +test("GIVEN chat history exceeds max token number WHEN applying filter AND there is a system role in chat history THEN it should return the filtered chat history", () => { + const maxTokens = 50; + const chatHistory: ChatHistoryMessage[] = [ + { + completion: { + role: "system", + content: "You are a helpful chatbot.", + }, + numTokens: 15, + chatMessageType: CHAT_MESSAGE_TYPE.SYSTEM, + }, + { + completion: { + role: "user", + content: "Hello, my name is Bob.", + }, + numTokens: 15, + chatMessageType: CHAT_MESSAGE_TYPE.USER, + }, + { + completion: { + role: "assistant", + content: "Hello, how are you?", + }, + numTokens: 17, + chatMessageType: CHAT_MESSAGE_TYPE.BOT, + }, + { + completion: { + role: "user", + content: "Send an email to my boss to tell him I quit.", + }, + numTokens: 30, + chatMessageType: CHAT_MESSAGE_TYPE.USER, + }, + ]; + + const expectedFilteredChatHistory = [ + { + completion: { + role: "system", + content: "You are a helpful chatbot.", + }, + numTokens: 15, + chatMessageType: CHAT_MESSAGE_TYPE.SYSTEM, + }, + { + completion: { + role: "user", + content: "Send an email to my boss to tell him I quit.", + }, + numTokens: 30, + chatMessageType: CHAT_MESSAGE_TYPE.USER, + }, + ]; + const filteredChatHistory = filterChatHistoryByMaxTokens( + chatHistory, + maxTokens + ); + expect(filteredChatHistory.length).toEqual(2); + expect(filteredChatHistory).toEqual(expectedFilteredChatHistory); +}); + +test("GIVEN chat history most recent message exceeds max tokens alone WHEN applying filter THEN it should return this message", () => { + const maxTokens = 30; + const chatHistory: ChatHistoryMessage[] = [ + { + completion: { + role: "user", + content: + "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. ", + }, + numTokens: 50, + chatMessageType: CHAT_MESSAGE_TYPE.USER, + }, + ]; + const filteredChatHistory = filterChatHistoryByMaxTokens( + chatHistory, + maxTokens + ); + + expect(filteredChatHistory).toEqual(chatHistory); +}); + afterEach(() => { jest.clearAllMocks(); }); From 115db8bd1c4c4631a051e1b28085d823dbe11c71 Mon Sep 17 00:00:00 2001 From: "Heather Logan (She/Her)" Date: Tue, 12 Sep 2023 17:02:55 +0100 Subject: [PATCH 3/6] add max token sizes for each model --- backend/src/openai.ts | 52 +++++++++++++++---------------------------- 1 file changed, 18 insertions(+), 34 deletions(-) diff --git a/backend/src/openai.ts b/backend/src/openai.ts index a523b8aea..c72fe6f5b 100644 --- a/backend/src/openai.ts +++ b/backend/src/openai.ts @@ -86,6 +86,18 @@ const chatGptFunctions = [ }, ]; +// max tokens each model can use +const chatModelMaxTokens = { + [CHAT_MODELS.GPT_4]: 8192, + [CHAT_MODELS.GPT_4_0613]: 8192, + [CHAT_MODELS.GPT_4_32K]: 32768, + [CHAT_MODELS.GPT_4_32K_0613]: 32768, + [CHAT_MODELS.GPT_3_5_TURBO]: 4097, + [CHAT_MODELS.GPT_3_5_TURBO_0613]: 4097, + [CHAT_MODELS.GPT_3_5_TURBO_16K]: 16385, + [CHAT_MODELS.GPT_3_5_TURBO_16K_0613]: 16385, +}; + // test the api key works with the model async function validateApiKey(openAiApiKey: string, gptModel: string) { try { @@ -289,32 +301,16 @@ async function chatGptChatCompletion( chatHistory.shift(); } } - const chat_completion = await openai.createChatCompletion({ model: gptModel, - messages: getChatCompletionsFromHistory(chatHistory), + messages: getChatCompletionsFromHistory(chatHistory, gptModel), functions: chatGptFunctions, }); - console.debug( - "chat completion. token info: ", - JSON.stringify(chat_completion.data.usage) - ); - // get the reply return chat_completion.data.choices[0].message ?? null; } -function countChatHistoryTokens(chatHistory: ChatHistoryMessage[]) { - let sumTokens = 0; - chatHistory.forEach((message) => { - if (message.numTokens) { - sumTokens += message.numTokens; - } - }); - return sumTokens; -} - // take only the chat history to send to GPT that is within the max tokens function filterChatHistoryByMaxTokens( list: ChatHistoryMessage[], @@ -358,37 +354,25 @@ function filterChatHistoryByMaxTokens( // take only the completions to send to GPT function getChatCompletionsFromHistory( - chatHistory: ChatHistoryMessage[] + chatHistory: ChatHistoryMessage[], + gptModel: CHAT_MODELS ): ChatCompletionRequestMessage[] { // limit the number of tokens sent to GPT - const maxTokens = 500; + const maxTokens = chatModelMaxTokens[gptModel]; + console.log("gpt model = ", gptModel, "max tokens = ", maxTokens); + const reducedChatHistory: ChatHistoryMessage[] = filterChatHistoryByMaxTokens( chatHistory, maxTokens ); - console.debug( - "number of tokens in chat history", - countChatHistoryTokens(chatHistory) - ); - - console.log("reduced chat history: "); - console.log(reducedChatHistory); - const completions: ChatCompletionRequestMessage[] = reducedChatHistory.length > 0 ? (reducedChatHistory .filter((message) => message.completion !== null) .map( - // we know the completion is not null here (message) => message.completion ) as ChatCompletionRequestMessage[]) : []; - - console.debug( - "number of tokens in reduced chat history", - countChatHistoryTokens(reducedChatHistory) - ); - return completions; } From 38638896697d3bdb3c64d83835ba873651393d4d Mon Sep 17 00:00:00 2001 From: "Heather Logan (She/Her)" Date: Tue, 12 Sep 2023 17:14:07 +0100 Subject: [PATCH 4/6] fix selecting gpt model not updating --- backend/src/router.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/backend/src/router.ts b/backend/src/router.ts index fb3d697a1..e077d1281 100644 --- a/backend/src/router.ts +++ b/backend/src/router.ts @@ -339,6 +339,7 @@ router.post("/openai/model", async (req: OpenAiSetModelRequest, res) => { } else if (model === req.session.gptModel) { res.status(200).send(); } else if (await setGptModel(req.session.openAiApiKey, model)) { + req.session.gptModel = model; res.status(200).send(); } else { res.status(401).send(); From 3769daef3d77456b1fa45d834aea038a69c262ec Mon Sep 17 00:00:00 2001 From: "Heather Logan (She/Her)" Date: Tue, 12 Sep 2023 17:17:30 +0100 Subject: [PATCH 5/6] fix the button --- frontend/src/components/ModelSelectionBox/ModelSelectionBox.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/src/components/ModelSelectionBox/ModelSelectionBox.tsx b/frontend/src/components/ModelSelectionBox/ModelSelectionBox.tsx index b120342ab..3f24b3eb2 100644 --- a/frontend/src/components/ModelSelectionBox/ModelSelectionBox.tsx +++ b/frontend/src/components/ModelSelectionBox/ModelSelectionBox.tsx @@ -66,7 +66,7 @@ function ModelSelectionBox() { From e46f4ef3eeba402f90ec05fa86c6100fb68afe42 Mon Sep 17 00:00:00 2001 From: "Heather Logan (She/Her)" Date: Wed, 13 Sep 2023 11:05:55 +0100 Subject: [PATCH 6/6] rename max chat history variable --- backend/src/openai.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/src/openai.ts b/backend/src/openai.ts index c72fe6f5b..87b7ddc0d 100644 --- a/backend/src/openai.ts +++ b/backend/src/openai.ts @@ -382,14 +382,14 @@ function pushCompletionToHistory( messageType: CHAT_MESSAGE_TYPE ) { // limit the length of the chat history - const maxMessageLength = 1000; + const maxChatHistoryLength = 1000; // gpt-4 and 3.5 models use cl100k_base encoding const encoding = get_encoding("cl100k_base"); if (messageType !== CHAT_MESSAGE_TYPE.BOT_BLOCKED) { // remove the oldest message, not including system role message - if (chatHistory.length >= maxMessageLength) { + if (chatHistory.length >= maxChatHistoryLength) { if (chatHistory[0].completion?.role !== "system") { chatHistory.shift(); } else {