From a41a04d7c3b15d68ba9e1314e6843cb7f69aa1c4 Mon Sep 17 00:00:00 2001
From: "Heather Logan (She/Her)" <hlogan@scottlogic.com>
Date: Tue, 12 Sep 2023 09:03:06 +0100
Subject: [PATCH 1/6] remove old messages from chat history when queue limit
 reached

---
 backend/src/openai.ts | 47 ++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 46 insertions(+), 1 deletion(-)

diff --git a/backend/src/openai.ts b/backend/src/openai.ts
index efa44fd06..94b2e3cd1 100644
--- a/backend/src/openai.ts
+++ b/backend/src/openai.ts
@@ -26,6 +26,7 @@ import {
   FunctionAskQuestionParams,
   FunctionSendEmailParams,
 } from "./models/openai";
+import { get_encoding } from "@dqbd/tiktoken";
 
 // OpenAI config
 let config: Configuration | null = null;
@@ -294,6 +295,11 @@ async function chatGptChatCompletion(
     functions: chatGptFunctions,
   });
 
+  console.debug(
+    "chat completion. token info: ",
+    JSON.stringify(chat_completion.data.usage)
+  );
+
   // get the reply
   return chat_completion.data.choices[0].message ?? null;
 }
@@ -302,8 +308,29 @@ async function chatGptChatCompletion(
 function getChatCompletionsFromHistory(
   chatHistory: ChatHistoryMessage[]
 ): ChatCompletionRequestMessage[] {
+  // limit the number of tokens sent to GPT
+  const tokenLimit = 100;
+  let currentTokens = 0;
+
+  // reverse chat history
+  const reducedChatHistory = chatHistory.reverse().filter((message) => {
+    const totalTokens = currentTokens + (message.numTokens ?? 0);
+    if (totalTokens <= tokenLimit) {
+      currentTokens = totalTokens;
+      console.debug("current tokens: ", currentTokens);
+      return true;
+    } else {
+      return false;
+    }
+  });
+
+  // reduce to only the completions
+  console.debug("Chat history: to reduce ", chatHistory.length);
+  console.debug("Reduced chat history: to show  ", reducedChatHistory.length);
+  console.log(reducedChatHistory);
+
   const completions: ChatCompletionRequestMessage[] =
-    chatHistory.length > 0
+    reducedChatHistory.reverse().length > 0
       ? (chatHistory
           .filter((message) => message.completion !== null)
           .map(
@@ -311,6 +338,7 @@ function getChatCompletionsFromHistory(
             (message) => message.completion
           ) as ChatCompletionRequestMessage[])
       : [];
+
   return completions;
 }
 
@@ -319,10 +347,27 @@ function pushCompletionToHistory(
   completion: ChatCompletionRequestMessage,
   messageType: CHAT_MESSAGE_TYPE
 ) {
+  // limit the length of the chat history
+  const maxMessageLength = 1000;
+
+  // gpt-4 and 3.5 models use cl100k_base encoding
+  const encoding = get_encoding("cl100k_base");
+
   if (messageType !== CHAT_MESSAGE_TYPE.BOT_BLOCKED) {
+    // remove the oldest message, not including system role message
+    if (chatHistory.length >= maxMessageLength) {
+      if (chatHistory[0].completion?.role !== "system") {
+        chatHistory.shift();
+      } else {
+        chatHistory.splice(1, 1);
+      }
+    }
     chatHistory.push({
       completion: completion,
       chatMessageType: messageType,
+      numTokens: completion.content
+        ? encoding.encode(completion.content).length
+        : null,
     });
   } else {
     // do not add the bots reply which was subsequently blocked

From 65c327cac26ebaebdf400f2143fff43de8cb4eea Mon Sep 17 00:00:00 2001
From: "Heather Logan (She/Her)" <hlogan@scottlogic.com>
Date: Tue, 12 Sep 2023 16:26:18 +0100
Subject: [PATCH 2/6] filter chat history based on max tokens

---
 backend/package-lock.json        |   6 +
 backend/package.json             |   1 +
 backend/src/models/chat.ts       |   1 +
 backend/src/openai.ts            |  96 ++++++++++++----
 backend/test/unit/openai.test.ts | 187 ++++++++++++++++++++++++++++++-
 5 files changed, 269 insertions(+), 22 deletions(-)

diff --git a/backend/package-lock.json b/backend/package-lock.json
index 7544869bd..90ed72a6e 100644
--- a/backend/package-lock.json
+++ b/backend/package-lock.json
@@ -5,6 +5,7 @@
   "packages": {
     "": {
       "dependencies": {
+        "@dqbd/tiktoken": "^1.0.7",
         "cors": "^2.8.5",
         "d3-dsv": "^2.0.0",
         "dotenv": "^16.3.1",
@@ -809,6 +810,11 @@
         "@jridgewell/sourcemap-codec": "^1.4.10"
       }
     },
+    "node_modules/@dqbd/tiktoken": {
+      "version": "1.0.7",
+      "resolved": "https://registry.npmjs.org/@dqbd/tiktoken/-/tiktoken-1.0.7.tgz",
+      "integrity": "sha512-bhR5k5W+8GLzysjk8zTMVygQZsgvf7W1F0IlL4ZQ5ugjo5rCyiwGM5d8DYriXspytfu98tv59niang3/T+FoDw=="
+    },
     "node_modules/@eslint-community/eslint-utils": {
       "version": "4.4.0",
       "resolved": "https://registry.npmjs.org/@eslint-community/eslint-utils/-/eslint-utils-4.4.0.tgz",
diff --git a/backend/package.json b/backend/package.json
index e5b072bd6..7fa4e5b30 100644
--- a/backend/package.json
+++ b/backend/package.json
@@ -1,5 +1,6 @@
 {
   "dependencies": {
+    "@dqbd/tiktoken": "^1.0.7",
     "cors": "^2.8.5",
     "d3-dsv": "^2.0.0",
     "dotenv": "^16.3.1",
diff --git a/backend/src/models/chat.ts b/backend/src/models/chat.ts
index 0ce3cd045..66cce5fdb 100644
--- a/backend/src/models/chat.ts
+++ b/backend/src/models/chat.ts
@@ -59,6 +59,7 @@ interface ChatHttpResponse {
 interface ChatHistoryMessage {
   completion: ChatCompletionRequestMessage | null;
   chatMessageType: CHAT_MESSAGE_TYPE;
+  numTokens?: number | null;
   infoMessage?: string | null;
 }
 
diff --git a/backend/src/openai.ts b/backend/src/openai.ts
index 94b2e3cd1..a523b8aea 100644
--- a/backend/src/openai.ts
+++ b/backend/src/openai.ts
@@ -53,6 +53,7 @@ const chatGptFunctions = [
         },
         confirmed: {
           type: "boolean",
+          default: "false",
           description:
             "whether the user has confirmed the email is correct before sending",
         },
@@ -304,34 +305,78 @@ async function chatGptChatCompletion(
   return chat_completion.data.choices[0].message ?? null;
 }
 
+function countChatHistoryTokens(chatHistory: ChatHistoryMessage[]) {
+  let sumTokens = 0;
+  chatHistory.forEach((message) => {
+    if (message.numTokens) {
+      sumTokens += message.numTokens;
+    }
+  });
+  return sumTokens;
+}
+
+// take only the chat history to send to GPT that is within the max tokens
+function filterChatHistoryByMaxTokens(
+  list: ChatHistoryMessage[],
+  maxNumTokens: number
+): ChatHistoryMessage[] {
+  let sumTokens = 0;
+  const filteredList: ChatHistoryMessage[] = [];
+
+  // reverse list to add from most recent
+  const reverseList = list.slice().reverse();
+
+  // always add the most recent message to start of list
+  filteredList.push(reverseList[0]);
+  sumTokens += reverseList[0].numTokens ?? 0;
+
+  // if the first message is a system role add it to list
+  if (list[0].completion?.role === "system") {
+    sumTokens += list[0].numTokens ?? 0;
+    filteredList.push(list[0]);
+  }
+
+  // add elements after first message until max tokens reached
+  for (let i = 1; i < reverseList.length; i++) {
+    const element = reverseList[i];
+    if (element.completion && element.numTokens) {
+      // if we reach end and system role is there skip as it's already been added
+      if (element.completion.role === "system") {
+        continue;
+      }
+      if (sumTokens + element.numTokens <= maxNumTokens) {
+        filteredList.splice(i, 0, element);
+        sumTokens += element.numTokens;
+      } else {
+        console.debug("max tokens reached on element = ", element);
+        break;
+      }
+    }
+  }
+  return filteredList.reverse();
+}
+
 // take only the completions to send to GPT
 function getChatCompletionsFromHistory(
   chatHistory: ChatHistoryMessage[]
 ): ChatCompletionRequestMessage[] {
   // limit the number of tokens sent to GPT
-  const tokenLimit = 100;
-  let currentTokens = 0;
-
-  // reverse chat history
-  const reducedChatHistory = chatHistory.reverse().filter((message) => {
-    const totalTokens = currentTokens + (message.numTokens ?? 0);
-    if (totalTokens <= tokenLimit) {
-      currentTokens = totalTokens;
-      console.debug("current tokens: ", currentTokens);
-      return true;
-    } else {
-      return false;
-    }
-  });
+  const maxTokens = 500;
+  const reducedChatHistory: ChatHistoryMessage[] = filterChatHistoryByMaxTokens(
+    chatHistory,
+    maxTokens
+  );
+  console.debug(
+    "number of tokens in chat history",
+    countChatHistoryTokens(chatHistory)
+  );
 
-  // reduce to only the completions
-  console.debug("Chat history: to reduce ", chatHistory.length);
-  console.debug("Reduced chat history: to show  ", reducedChatHistory.length);
+  console.log("reduced chat history: ");
   console.log(reducedChatHistory);
 
   const completions: ChatCompletionRequestMessage[] =
-    reducedChatHistory.reverse().length > 0
-      ? (chatHistory
+    reducedChatHistory.length > 0
+      ? (reducedChatHistory
           .filter((message) => message.completion !== null)
           .map(
             // we know the completion is not null here
@@ -339,6 +384,11 @@ function getChatCompletionsFromHistory(
           ) as ChatCompletionRequestMessage[])
       : [];
 
+  console.debug(
+    "number of tokens in reduced chat history",
+    countChatHistoryTokens(reducedChatHistory)
+  );
+
   return completions;
 }
 
@@ -504,4 +554,10 @@ async function chatGptSendMessage(
   }
 }
 
-export { chatGptSendMessage, setOpenAiApiKey, validateApiKey, setGptModel };
+export {
+  chatGptSendMessage,
+  filterChatHistoryByMaxTokens,
+  setOpenAiApiKey,
+  validateApiKey,
+  setGptModel,
+};
diff --git a/backend/test/unit/openai.test.ts b/backend/test/unit/openai.test.ts
index e02709b5d..a71539736 100644
--- a/backend/test/unit/openai.test.ts
+++ b/backend/test/unit/openai.test.ts
@@ -1,7 +1,15 @@
 import { OpenAIApi } from "openai";
-import { validateApiKey, setOpenAiApiKey } from "../../src/openai";
+import {
+  validateApiKey,
+  setOpenAiApiKey,
+  filterChatHistoryByMaxTokens,
+} from "../../src/openai";
 import { initQAModel } from "../../src/langchain";
-import { CHAT_MODELS } from "../../src/models/chat";
+import {
+  CHAT_MESSAGE_TYPE,
+  CHAT_MODELS,
+  ChatHistoryMessage,
+} from "../../src/models/chat";
 
 // Define a mock implementation for the createChatCompletion method
 const mockCreateChatCompletion = jest.fn();
@@ -72,6 +80,181 @@ test("GIVEN an invalid API key WHEN calling setOpenAiApiKey THEN it should set t
   expect(initQAModel).not.toHaveBeenCalled();
 });
 
+test("GIVEN chat history exceeds max token number WHEN applying filter THEN it should return the filtered chat history", () => {
+  const maxTokens = 50;
+  const chatHistory: ChatHistoryMessage[] = [
+    {
+      completion: {
+        role: "user",
+        content: "Hello, my name is Bob.",
+      },
+      numTokens: 15,
+      chatMessageType: CHAT_MESSAGE_TYPE.USER,
+    },
+    {
+      completion: {
+        role: "assistant",
+        content: "Hello, how are you?",
+      },
+      numTokens: 17,
+      chatMessageType: CHAT_MESSAGE_TYPE.BOT,
+    },
+    {
+      completion: {
+        role: "user",
+        content: "Send an email to my boss to tell him I quit.",
+      },
+      numTokens: 30,
+      chatMessageType: CHAT_MESSAGE_TYPE.USER,
+    },
+  ];
+  // expect that the first message is discounted
+  const expectedFilteredChatHistory = [
+    {
+      completion: {
+        role: "assistant",
+        content: "Hello, how are you?",
+      },
+      numTokens: 17,
+      chatMessageType: CHAT_MESSAGE_TYPE.BOT,
+    },
+    {
+      completion: {
+        role: "user",
+        content: "Send an email to my boss to tell him I quit.",
+      },
+      numTokens: 30,
+      chatMessageType: CHAT_MESSAGE_TYPE.USER,
+    },
+  ];
+
+  const filteredChatHistory = filterChatHistoryByMaxTokens(
+    chatHistory,
+    maxTokens
+  );
+  expect(filteredChatHistory).toEqual(expectedFilteredChatHistory);
+});
+
+test("GIVEN chat history does not exceed max token number WHEN applying filter THEN it should return the original chat history", () => {
+  const maxTokens = 1000;
+  const chatHistory: ChatHistoryMessage[] = [
+    {
+      completion: {
+        role: "user",
+        content: "Hello, my name is Bob.",
+      },
+      numTokens: 15,
+      chatMessageType: CHAT_MESSAGE_TYPE.USER,
+    },
+    {
+      completion: {
+        role: "assistant",
+        content: "Hello, how are you?",
+      },
+      numTokens: 17,
+      chatMessageType: CHAT_MESSAGE_TYPE.BOT,
+    },
+    {
+      completion: {
+        role: "user",
+        content: "Send an email to my boss to tell him I quit.",
+      },
+      numTokens: 30,
+      chatMessageType: CHAT_MESSAGE_TYPE.USER,
+    },
+  ];
+
+  const filteredChatHistory = filterChatHistoryByMaxTokens(
+    chatHistory,
+    maxTokens
+  );
+  expect(filteredChatHistory).toEqual(chatHistory);
+});
+
+test("GIVEN chat history exceeds max token number WHEN applying filter AND there is a system role in chat history THEN it should return the filtered chat history", () => {
+  const maxTokens = 50;
+  const chatHistory: ChatHistoryMessage[] = [
+    {
+      completion: {
+        role: "system",
+        content: "You are a helpful chatbot.",
+      },
+      numTokens: 15,
+      chatMessageType: CHAT_MESSAGE_TYPE.SYSTEM,
+    },
+    {
+      completion: {
+        role: "user",
+        content: "Hello, my name is Bob.",
+      },
+      numTokens: 15,
+      chatMessageType: CHAT_MESSAGE_TYPE.USER,
+    },
+    {
+      completion: {
+        role: "assistant",
+        content: "Hello, how are you?",
+      },
+      numTokens: 17,
+      chatMessageType: CHAT_MESSAGE_TYPE.BOT,
+    },
+    {
+      completion: {
+        role: "user",
+        content: "Send an email to my boss to tell him I quit.",
+      },
+      numTokens: 30,
+      chatMessageType: CHAT_MESSAGE_TYPE.USER,
+    },
+  ];
+
+  const expectedFilteredChatHistory = [
+    {
+      completion: {
+        role: "system",
+        content: "You are a helpful chatbot.",
+      },
+      numTokens: 15,
+      chatMessageType: CHAT_MESSAGE_TYPE.SYSTEM,
+    },
+    {
+      completion: {
+        role: "user",
+        content: "Send an email to my boss to tell him I quit.",
+      },
+      numTokens: 30,
+      chatMessageType: CHAT_MESSAGE_TYPE.USER,
+    },
+  ];
+  const filteredChatHistory = filterChatHistoryByMaxTokens(
+    chatHistory,
+    maxTokens
+  );
+  expect(filteredChatHistory.length).toEqual(2);
+  expect(filteredChatHistory).toEqual(expectedFilteredChatHistory);
+});
+
+test("GIVEN chat history most recent message exceeds max tokens alone WHEN applying filter THEN it should return this message", () => {
+  const maxTokens = 30;
+  const chatHistory: ChatHistoryMessage[] = [
+    {
+      completion: {
+        role: "user",
+        content:
+          "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. ",
+      },
+      numTokens: 50,
+      chatMessageType: CHAT_MESSAGE_TYPE.USER,
+    },
+  ];
+  const filteredChatHistory = filterChatHistoryByMaxTokens(
+    chatHistory,
+    maxTokens
+  );
+
+  expect(filteredChatHistory).toEqual(chatHistory);
+});
+
 afterEach(() => {
   jest.clearAllMocks();
 });

From 115db8bd1c4c4631a051e1b28085d823dbe11c71 Mon Sep 17 00:00:00 2001
From: "Heather Logan (She/Her)" <hlogan@scottlogic.com>
Date: Tue, 12 Sep 2023 17:02:55 +0100
Subject: [PATCH 3/6] add max token sizes for each model

---
 backend/src/openai.ts | 52 +++++++++++++++----------------------------
 1 file changed, 18 insertions(+), 34 deletions(-)

diff --git a/backend/src/openai.ts b/backend/src/openai.ts
index a523b8aea..c72fe6f5b 100644
--- a/backend/src/openai.ts
+++ b/backend/src/openai.ts
@@ -86,6 +86,18 @@ const chatGptFunctions = [
   },
 ];
 
+// max tokens each model can use
+const chatModelMaxTokens = {
+  [CHAT_MODELS.GPT_4]: 8192,
+  [CHAT_MODELS.GPT_4_0613]: 8192,
+  [CHAT_MODELS.GPT_4_32K]: 32768,
+  [CHAT_MODELS.GPT_4_32K_0613]: 32768,
+  [CHAT_MODELS.GPT_3_5_TURBO]: 4097,
+  [CHAT_MODELS.GPT_3_5_TURBO_0613]: 4097,
+  [CHAT_MODELS.GPT_3_5_TURBO_16K]: 16385,
+  [CHAT_MODELS.GPT_3_5_TURBO_16K_0613]: 16385,
+};
+
 // test the api key works with the model
 async function validateApiKey(openAiApiKey: string, gptModel: string) {
   try {
@@ -289,32 +301,16 @@ async function chatGptChatCompletion(
       chatHistory.shift();
     }
   }
-
   const chat_completion = await openai.createChatCompletion({
     model: gptModel,
-    messages: getChatCompletionsFromHistory(chatHistory),
+    messages: getChatCompletionsFromHistory(chatHistory, gptModel),
     functions: chatGptFunctions,
   });
 
-  console.debug(
-    "chat completion. token info: ",
-    JSON.stringify(chat_completion.data.usage)
-  );
-
   // get the reply
   return chat_completion.data.choices[0].message ?? null;
 }
 
-function countChatHistoryTokens(chatHistory: ChatHistoryMessage[]) {
-  let sumTokens = 0;
-  chatHistory.forEach((message) => {
-    if (message.numTokens) {
-      sumTokens += message.numTokens;
-    }
-  });
-  return sumTokens;
-}
-
 // take only the chat history to send to GPT that is within the max tokens
 function filterChatHistoryByMaxTokens(
   list: ChatHistoryMessage[],
@@ -358,37 +354,25 @@ function filterChatHistoryByMaxTokens(
 
 // take only the completions to send to GPT
 function getChatCompletionsFromHistory(
-  chatHistory: ChatHistoryMessage[]
+  chatHistory: ChatHistoryMessage[],
+  gptModel: CHAT_MODELS
 ): ChatCompletionRequestMessage[] {
   // limit the number of tokens sent to GPT
-  const maxTokens = 500;
+  const maxTokens = chatModelMaxTokens[gptModel];
+  console.log("gpt model = ", gptModel, "max tokens = ", maxTokens);
+
   const reducedChatHistory: ChatHistoryMessage[] = filterChatHistoryByMaxTokens(
     chatHistory,
     maxTokens
   );
-  console.debug(
-    "number of tokens in chat history",
-    countChatHistoryTokens(chatHistory)
-  );
-
-  console.log("reduced chat history: ");
-  console.log(reducedChatHistory);
-
   const completions: ChatCompletionRequestMessage[] =
     reducedChatHistory.length > 0
       ? (reducedChatHistory
           .filter((message) => message.completion !== null)
           .map(
-            // we know the completion is not null here
             (message) => message.completion
           ) as ChatCompletionRequestMessage[])
       : [];
-
-  console.debug(
-    "number of tokens in reduced chat history",
-    countChatHistoryTokens(reducedChatHistory)
-  );
-
   return completions;
 }
 

From 38638896697d3bdb3c64d83835ba873651393d4d Mon Sep 17 00:00:00 2001
From: "Heather Logan (She/Her)" <hlogan@scottlogic.com>
Date: Tue, 12 Sep 2023 17:14:07 +0100
Subject: [PATCH 4/6] fix selecting gpt model not updating

---
 backend/src/router.ts | 1 +
 1 file changed, 1 insertion(+)

diff --git a/backend/src/router.ts b/backend/src/router.ts
index fb3d697a1..e077d1281 100644
--- a/backend/src/router.ts
+++ b/backend/src/router.ts
@@ -339,6 +339,7 @@ router.post("/openai/model", async (req: OpenAiSetModelRequest, res) => {
   } else if (model === req.session.gptModel) {
     res.status(200).send();
   } else if (await setGptModel(req.session.openAiApiKey, model)) {
+    req.session.gptModel = model;
     res.status(200).send();
   } else {
     res.status(401).send();

From 3769daef3d77456b1fa45d834aea038a69c262ec Mon Sep 17 00:00:00 2001
From: "Heather Logan (She/Her)" <hlogan@scottlogic.com>
Date: Tue, 12 Sep 2023 17:17:30 +0100
Subject: [PATCH 5/6] fix the button

---
 frontend/src/components/ModelSelectionBox/ModelSelectionBox.tsx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/frontend/src/components/ModelSelectionBox/ModelSelectionBox.tsx b/frontend/src/components/ModelSelectionBox/ModelSelectionBox.tsx
index b120342ab..3f24b3eb2 100644
--- a/frontend/src/components/ModelSelectionBox/ModelSelectionBox.tsx
+++ b/frontend/src/components/ModelSelectionBox/ModelSelectionBox.tsx
@@ -66,7 +66,7 @@ function ModelSelectionBox() {
           <button
             id="model-selection-button"
             className="prompt-injection-button"
-            onClick={void submitSelectedModel}
+            onClick={() => void submitSelectedModel()}
           >
             Choose
           </button>

From e46f4ef3eeba402f90ec05fa86c6100fb68afe42 Mon Sep 17 00:00:00 2001
From: "Heather Logan (She/Her)" <hlogan@scottlogic.com>
Date: Wed, 13 Sep 2023 11:05:55 +0100
Subject: [PATCH 6/6] rename max chat history variable

---
 backend/src/openai.ts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/backend/src/openai.ts b/backend/src/openai.ts
index c72fe6f5b..87b7ddc0d 100644
--- a/backend/src/openai.ts
+++ b/backend/src/openai.ts
@@ -382,14 +382,14 @@ function pushCompletionToHistory(
   messageType: CHAT_MESSAGE_TYPE
 ) {
   // limit the length of the chat history
-  const maxMessageLength = 1000;
+  const maxChatHistoryLength = 1000;
 
   // gpt-4 and 3.5 models use cl100k_base encoding
   const encoding = get_encoding("cl100k_base");
 
   if (messageType !== CHAT_MESSAGE_TYPE.BOT_BLOCKED) {
     // remove the oldest message, not including system role message
-    if (chatHistory.length >= maxMessageLength) {
+    if (chatHistory.length >= maxChatHistoryLength) {
       if (chatHistory[0].completion?.role !== "system") {
         chatHistory.shift();
       } else {