From 1ad89d54b73bf1ece987e0e3599722e7e923d228 Mon Sep 17 00:00:00 2001 From: "Heather Logan (She/Her)" Date: Fri, 6 Oct 2023 11:04:35 +0100 Subject: [PATCH 1/3] update system role when configured --- backend/src/openai.ts | 10 +++++++++- backend/test/integration/openai.test.ts | 18 ++++++++++++++---- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/backend/src/openai.ts b/backend/src/openai.ts index af43be737..37b7758d8 100644 --- a/backend/src/openai.ts +++ b/backend/src/openai.ts @@ -282,16 +282,24 @@ async function chatGptChatCompletion( currentLevel !== LEVEL_NAMES.SANDBOX || isDefenceActive(DEFENCE_TYPES.SYSTEM_ROLE, defences) ) { + const systemRoleContent = getSystemRole(defences, currentLevel); + // check to see if there's already a system role if (!chatHistory.find((message) => message.completion?.role === "system")) { // add the system role to the start of the chat history chatHistory.unshift({ completion: { role: "system", - content: getSystemRole(defences, currentLevel), + content: systemRoleContent, }, chatMessageType: CHAT_MESSAGE_TYPE.SYSTEM, }); + } else { + // replace with the latest system role + chatHistory[0].completion = { + role: "system", + content: systemRoleContent, + }; } } else { // remove the system role from the chat history diff --git a/backend/test/integration/openai.test.ts b/backend/test/integration/openai.test.ts index 5041268ff..a25347018 100644 --- a/backend/test/integration/openai.test.ts +++ b/backend/test/integration/openai.test.ts @@ -7,7 +7,11 @@ import { import { chatGptSendMessage } from "../../src/openai"; import { DEFENCE_TYPES, DefenceInfo } from "../../src/models/defence"; import { EmailInfo } from "../../src/models/email"; -import { activateDefence, getInitialDefences } from "../../src/defence"; +import { + activateDefence, + configureDefence, + getInitialDefences, +} from "../../src/defence"; import { systemRoleDefault } from "../../src/promptTemplates"; // Define a mock implementation for the createChatCompletion method @@ -323,8 +327,8 @@ test("GIVEN SYSTEM_ROLE defence is inactive WHEN sending message THEN system rol }); test( - "GIVEN SYSTEM_ROLE defence is active AND the system role is already in the chat history " + - "WHEN sending message THEN system role is not re-added to the chat history", + "GIVEN SYSTEM_ROLE defence is configured AND the system role is already in the chat history " + + "WHEN sending message THEN system role is replaced with default value in the chat history", async () => { const message = "Hello"; const chatHistory: ChatHistoryMessage[] = [ @@ -365,6 +369,12 @@ test( const openAiApiKey = "sk-12345"; defences = activateDefence(DEFENCE_TYPES.SYSTEM_ROLE, defences); + defences = configureDefence(DEFENCE_TYPES.SYSTEM_ROLE, defences, [ + { + id: "systemRole", + value: "You are not a helpful assistant", + }, + ]); // Mock the createChatCompletion function mockCreateChatCompletion.mockResolvedValueOnce({ @@ -398,7 +408,7 @@ test( // system role is added to the start of the chat history expect(chatHistory[0].completion?.role).toBe("system"); expect(chatHistory[0].completion?.content).toBe( - "You are a helpful assistant" + "You are not a helpful assistant" ); // rest of the chat history is in order expect(chatHistory[1].completion?.role).toBe("user"); From f47ba103bad6fde0a03f1e1f17e656703acec912 Mon Sep 17 00:00:00 2001 From: Chris Wilton-Magras Date: Fri, 13 Oct 2023 16:47:37 +0100 Subject: [PATCH 2/3] Tidier system role find-or-add --- backend/src/openai.ts | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/backend/src/openai.ts b/backend/src/openai.ts index 04e9a3d23..6dacab982 100644 --- a/backend/src/openai.ts +++ b/backend/src/openai.ts @@ -141,8 +141,7 @@ function getOpenAiFromKey(openAiApiKey: string) { config = new Configuration({ apiKey: openAiApiKey, }); - const openai = new OpenAIApi(config); - return openai; + return new OpenAIApi(config); } async function setGptModel(openAiApiKey: string, model: CHAT_MODELS) { @@ -156,9 +155,8 @@ async function setGptModel(openAiApiKey: string, model: CHAT_MODELS) { } } -// returns true if the function is in the list of functions available to ChatGPT function isChatGptFunction(functionName: string) { - return chatGptFunctions.find((func) => func.name === functionName); + return chatGptFunctions.some((func) => func.name === functionName); } async function chatGptCallFunction( @@ -282,24 +280,24 @@ async function chatGptChatCompletion( currentLevel !== LEVEL_NAMES.SANDBOX || isDefenceActive(DEFENCE_TYPES.SYSTEM_ROLE, defences) ) { - const systemRoleContent = getSystemRole(defences, currentLevel); + const completionConfig: ChatCompletionRequestMessage = { + role: "system", + content: getSystemRole(defences, currentLevel), + }; // check to see if there's already a system role - if (!chatHistory.find((message) => message.completion?.role === "system")) { + const systemRole = chatHistory.find( + (message) => message.completion?.role === "system" + ); + if (!systemRole) { // add the system role to the start of the chat history chatHistory.unshift({ - completion: { - role: "system", - content: systemRoleContent, - }, + completion: completionConfig, chatMessageType: CHAT_MESSAGE_TYPE.SYSTEM, }); } else { // replace with the latest system role - chatHistory[0].completion = { - role: "system", - content: systemRoleContent, - }; + systemRole.completion = completionConfig; } } else { // remove the system role from the chat history From c44c92e0356f4358d33d9e48a1ad3323fce9b9b6 Mon Sep 17 00:00:00 2001 From: Chris Wilton-Magras Date: Fri, 13 Oct 2023 16:47:57 +0100 Subject: [PATCH 3/3] Extract duplicated test fixtures --- backend/test/integration/openai.test.ts | 1256 ++++++++++------------- 1 file changed, 558 insertions(+), 698 deletions(-) diff --git a/backend/test/integration/openai.test.ts b/backend/test/integration/openai.test.ts index a25347018..b5388e310 100644 --- a/backend/test/integration/openai.test.ts +++ b/backend/test/integration/openai.test.ts @@ -40,321 +40,49 @@ jest.mock("../../src/langchain", () => { }; }); -beforeEach(() => { - // clear environment variables - process.env = {}; -}); - -test("GIVEN OpenAI initialised WHEN sending message THEN reply is returned", async () => { - const message = "Hello"; - const chatHistory: ChatHistoryMessage[] = []; - const defences: DefenceInfo[] = []; - const sentEmails: EmailInfo[] = []; - const chatModel: ChatModel = { - id: CHAT_MODELS.GPT_4, - configuration: { - temperature: 1, - topP: 1, - frequencyPenalty: 0, - presencePenalty: 0, - }, - }; - const openAiApiKey = "sk-12345"; - - // Mock the createChatCompletion function - mockCreateChatCompletion.mockResolvedValueOnce({ - data: { - choices: [ - { - message: { - role: "assistant", - content: "Hi", - }, - }, - ], - }, - }); - - // send the message - const reply = await chatGptSendMessage( - chatHistory, - defences, - chatModel, - message, - true, - openAiApiKey, - sentEmails - ); - - expect(reply).toBeDefined(); - expect(reply?.completion).toBeDefined(); - expect(reply?.completion.content).toBe("Hi"); - // check the chat history has been updated - expect(chatHistory.length).toBe(2); - expect(chatHistory[0].completion?.role).toBe("user"); - expect(chatHistory[0].completion?.content).toBe("Hello"); - expect(chatHistory[1].completion?.role).toBe("assistant"); - expect(chatHistory[1].completion?.content).toBe("Hi"); - - // restore the mock - mockCreateChatCompletion.mockRestore(); -}); - -test("GIVEN SYSTEM_ROLE defence is active WHEN sending message THEN system role is added to chat history", async () => { - // set the system role prompt - - const message = "Hello"; - const chatHistory: ChatHistoryMessage[] = []; - let defences: DefenceInfo[] = getInitialDefences(); - const sentEmails: EmailInfo[] = []; - const chatModel: ChatModel = { - id: CHAT_MODELS.GPT_4, - configuration: { - temperature: 1, - topP: 1, - frequencyPenalty: 0, - presencePenalty: 0, - }, - }; - const openAiApiKey = "sk-12345"; - - defences = activateDefence(DEFENCE_TYPES.SYSTEM_ROLE, defences); - - // Mock the createChatCompletion function - mockCreateChatCompletion.mockResolvedValueOnce({ +function chatResponseAssistant(content: string) { + return { data: { choices: [ { message: { role: "assistant", - content: "Hi", + content, }, }, ], }, - }); - - // send the message - const reply = await chatGptSendMessage( - chatHistory, - defences, - chatModel, - message, - true, - openAiApiKey, - sentEmails - ); - - expect(reply).toBeDefined(); - expect(reply?.completion.content).toBe("Hi"); - // check the chat history has been updated - expect(chatHistory.length).toBe(3); - // system role is added to the start of the chat history - expect(chatHistory[0].completion?.role).toBe("system"); - expect(chatHistory[0].completion?.content).toBe(systemRoleDefault); - expect(chatHistory[1].completion?.role).toBe("user"); - expect(chatHistory[1].completion?.content).toBe("Hello"); - expect(chatHistory[2].completion?.role).toBe("assistant"); - expect(chatHistory[2].completion?.content).toBe("Hi"); - - // restore the mock - mockCreateChatCompletion.mockRestore(); -}); - -test("GIVEN SYSTEM_ROLE defence is active WHEN sending message THEN system role is added to the start of the chat history", async () => { - const message = "Hello"; - const isOriginalMessage = true; - const chatHistory: ChatHistoryMessage[] = [ - { - completion: { - role: "user", - content: "I'm a user", - }, - chatMessageType: CHAT_MESSAGE_TYPE.USER, - }, - { - completion: { - role: "assistant", - content: "I'm an assistant", - }, - chatMessageType: CHAT_MESSAGE_TYPE.BOT, - }, - ]; - let defences: DefenceInfo[] = getInitialDefences(); - const sentEmails: EmailInfo[] = []; - const chatModel: ChatModel = { - id: CHAT_MODELS.GPT_4, - configuration: { - temperature: 1, - topP: 1, - frequencyPenalty: 0, - presencePenalty: 0, - }, }; +} - const openAiApiKey = "sk-12345"; - - // activate the SYSTEM_ROLE defence - defences = activateDefence(DEFENCE_TYPES.SYSTEM_ROLE, defences); - - // Mock the createChatCompletion function - mockCreateChatCompletion.mockResolvedValueOnce({ - data: { - choices: [ - { - message: { - role: "assistant", - content: "Hi", +const chatResponseAssistantEmailConfirm = { + data: { + choices: [ + { + message: { + role: "assistant", + content: null, + function_call: { + name: "sendEmail", + arguments: + '{\n "address": "bob@example.com",\n "subject": "Hi",\n "body": "Hello", "confirmed": "true" \n}', }, }, - ], - }, - }); - - // send the message - const reply = await chatGptSendMessage( - chatHistory, - defences, - chatModel, - message, - isOriginalMessage, - openAiApiKey, - sentEmails - ); - - expect(reply).toBeDefined(); - expect(reply?.completion.content).toBe("Hi"); - // check the chat history has been updated - expect(chatHistory.length).toBe(5); - // system role is added to the start of the chat history - expect(chatHistory[0].completion?.role).toBe("system"); - expect(chatHistory[0].completion?.content).toBe(systemRoleDefault); - // rest of the chat history is in order - expect(chatHistory[1].completion?.role).toBe("user"); - expect(chatHistory[1].completion?.content).toBe("I'm a user"); - expect(chatHistory[2].completion?.role).toBe("assistant"); - expect(chatHistory[2].completion?.content).toBe("I'm an assistant"); - expect(chatHistory[3].completion?.role).toBe("user"); - expect(chatHistory[3].completion?.content).toBe("Hello"); - expect(chatHistory[4].completion?.role).toBe("assistant"); - expect(chatHistory[4].completion?.content).toBe("Hi"); - - // restore the mock - mockCreateChatCompletion.mockRestore(); -}); - -test("GIVEN SYSTEM_ROLE defence is inactive WHEN sending message THEN system role is removed from the chat history", async () => { - const message = "Hello"; - const chatHistory: ChatHistoryMessage[] = [ - { - completion: { - role: "system", - content: "You are a helpful assistant", - }, - chatMessageType: CHAT_MESSAGE_TYPE.SYSTEM, - }, - { - completion: { - role: "user", - content: "I'm a user", - }, - chatMessageType: CHAT_MESSAGE_TYPE.USER, - }, - { - completion: { - role: "assistant", - content: "I'm an assistant", }, - chatMessageType: CHAT_MESSAGE_TYPE.BOT, - }, - ]; - const defences: DefenceInfo[] = getInitialDefences(); - const sentEmails: EmailInfo[] = []; - const chatModel: ChatModel = { - id: CHAT_MODELS.GPT_4, - configuration: { - temperature: 1, - topP: 1, - frequencyPenalty: 0, - presencePenalty: 0, - }, - }; - - const openAiApiKey = "sk-12345"; - - // Mock the createChatCompletion function - mockCreateChatCompletion.mockResolvedValueOnce({ - data: { - choices: [ - { - message: { - role: "assistant", - content: "Hi", - }, - }, - ], - }, + ], + }, +}; + +describe("OpenAI Integration Tests", () => { + beforeEach(() => { + // clear environment variables + process.env = {}; }); - // send the message - const reply = await chatGptSendMessage( - chatHistory, - defences, - chatModel, - message, - true, - openAiApiKey, - sentEmails - ); - - expect(reply).toBeDefined(); - expect(reply?.completion.content).toBe("Hi"); - // check the chat history has been updated - expect(chatHistory.length).toBe(4); - // system role is removed from the start of the chat history - // rest of the chat history is in order - expect(chatHistory[0].completion?.role).toBe("user"); - expect(chatHistory[0].completion?.content).toBe("I'm a user"); - expect(chatHistory[1].completion?.role).toBe("assistant"); - expect(chatHistory[1].completion?.content).toBe("I'm an assistant"); - expect(chatHistory[2].completion?.role).toBe("user"); - expect(chatHistory[2].completion?.content).toBe("Hello"); - expect(chatHistory[3].completion?.role).toBe("assistant"); - expect(chatHistory[3].completion?.content).toBe("Hi"); - - // restore the mock - mockCreateChatCompletion.mockRestore(); -}); - -test( - "GIVEN SYSTEM_ROLE defence is configured AND the system role is already in the chat history " + - "WHEN sending message THEN system role is replaced with default value in the chat history", - async () => { + test("GIVEN OpenAI initialised WHEN sending message THEN reply is returned", async () => { const message = "Hello"; - const chatHistory: ChatHistoryMessage[] = [ - { - completion: { - role: "system", - content: "You are a helpful assistant", - }, - chatMessageType: CHAT_MESSAGE_TYPE.SYSTEM, - }, - { - completion: { - role: "user", - content: "I'm a user", - }, - chatMessageType: CHAT_MESSAGE_TYPE.USER, - }, - { - completion: { - role: "assistant", - content: "I'm an assistant", - }, - chatMessageType: CHAT_MESSAGE_TYPE.BOT, - }, - ]; - let defences: DefenceInfo[] = getInitialDefences(); + const chatHistory: ChatHistoryMessage[] = []; + const defences: DefenceInfo[] = []; const sentEmails: EmailInfo[] = []; const chatModel: ChatModel = { id: CHAT_MODELS.GPT_4, @@ -365,30 +93,10 @@ test( presencePenalty: 0, }, }; - const openAiApiKey = "sk-12345"; - defences = activateDefence(DEFENCE_TYPES.SYSTEM_ROLE, defences); - defences = configureDefence(DEFENCE_TYPES.SYSTEM_ROLE, defences, [ - { - id: "systemRole", - value: "You are not a helpful assistant", - }, - ]); - // Mock the createChatCompletion function - mockCreateChatCompletion.mockResolvedValueOnce({ - data: { - choices: [ - { - message: { - role: "assistant", - content: "Hi", - }, - }, - ], - }, - }); + mockCreateChatCompletion.mockResolvedValueOnce(chatResponseAssistant("Hi")); // send the message const reply = await chatGptSendMessage( @@ -402,40 +110,24 @@ test( ); expect(reply).toBeDefined(); + expect(reply?.completion).toBeDefined(); expect(reply?.completion.content).toBe("Hi"); // check the chat history has been updated - expect(chatHistory.length).toBe(5); - // system role is added to the start of the chat history - expect(chatHistory[0].completion?.role).toBe("system"); - expect(chatHistory[0].completion?.content).toBe( - "You are not a helpful assistant" - ); - // rest of the chat history is in order - expect(chatHistory[1].completion?.role).toBe("user"); - expect(chatHistory[1].completion?.content).toBe("I'm a user"); - expect(chatHistory[2].completion?.role).toBe("assistant"); - expect(chatHistory[2].completion?.content).toBe("I'm an assistant"); - expect(chatHistory[3].completion?.role).toBe("user"); - expect(chatHistory[3].completion?.content).toBe("Hello"); - expect(chatHistory[4].completion?.role).toBe("assistant"); - expect(chatHistory[4].completion?.content).toBe("Hi"); + expect(chatHistory.length).toBe(2); + expect(chatHistory[0].completion?.role).toBe("user"); + expect(chatHistory[0].completion?.content).toBe("Hello"); + expect(chatHistory[1].completion?.role).toBe("assistant"); + expect(chatHistory[1].completion?.content).toBe("Hi"); // restore the mock mockCreateChatCompletion.mockRestore(); - } -); + }); -test( - "GIVEN the assistant sends an email AND EMAIL_WHITELIST is inactive AND email is not in the whitelist" + - "WHEN sending message " + - "THEN email is sent AND message is not blocked AND EMAIL_WHITELIST defence is alerted", - async () => { - // set email whitelist - process.env.EMAIL_WHITELIST = ""; + test("GIVEN SYSTEM_ROLE defence is active WHEN sending message THEN system role is added to chat history", async () => { + // set the system role prompt const message = "Hello"; const chatHistory: ChatHistoryMessage[] = []; - const defences: DefenceInfo[] = getInitialDefences(); const sentEmails: EmailInfo[] = []; const chatModel: ChatModel = { id: CHAT_MODELS.GPT_4, @@ -446,42 +138,15 @@ test( presencePenalty: 0, }, }; - const openAiApiKey = "sk-12345"; + const defences = activateDefence( + DEFENCE_TYPES.SYSTEM_ROLE, + getInitialDefences() + ); + // Mock the createChatCompletion function - mockCreateChatCompletion - // first time sendEmail is called - .mockResolvedValueOnce({ - data: { - choices: [ - { - message: { - role: "assistant", - content: null, - function_call: { - name: "sendEmail", - arguments: - '{\n "address": "bob@example.com",\n "subject": "Hi",\n "body": "Hello", "confirmed": "true" \n}', - }, - }, - }, - ], - }, - }) - // second time assistant sends a message - .mockResolvedValueOnce({ - data: { - choices: [ - { - message: { - role: "assistant", - content: "Email sent", - }, - }, - ], - }, - }); + mockCreateChatCompletion.mockResolvedValueOnce(chatResponseAssistant("Hi")); // send the message const reply = await chatGptSendMessage( @@ -495,36 +160,40 @@ test( ); expect(reply).toBeDefined(); - expect(reply?.completion.content).toBe("Email sent"); - // check that the email has been sent - expect(sentEmails.length).toBe(1); - expect(sentEmails[0].address).toBe("bob@example.com"); - expect(sentEmails[0].subject).toBe("Hi"); - expect(sentEmails[0].content).toBe("Hello"); - // message is not blocked - expect(reply?.defenceInfo.isBlocked).toBe(false); - // EMAIL_WHITELIST defence is alerted - expect(reply?.defenceInfo.alertedDefences.length).toBe(1); - expect(reply?.defenceInfo.alertedDefences[0]).toBe( - DEFENCE_TYPES.EMAIL_WHITELIST - ); + expect(reply?.completion.content).toBe("Hi"); + // check the chat history has been updated + expect(chatHistory.length).toBe(3); + // system role is added to the start of the chat history + expect(chatHistory[0].completion?.role).toBe("system"); + expect(chatHistory[0].completion?.content).toBe(systemRoleDefault); + expect(chatHistory[1].completion?.role).toBe("user"); + expect(chatHistory[1].completion?.content).toBe("Hello"); + expect(chatHistory[2].completion?.role).toBe("assistant"); + expect(chatHistory[2].completion?.content).toBe("Hi"); // restore the mock mockCreateChatCompletion.mockRestore(); - } -); - -test( - "GIVEN the assistant sends an email AND EMAIL_WHITELIST is active AND email is not in the whitelist" + - "WHEN sending message " + - "THEN email is not sent AND message is blocked AND EMAIL_WHITELIST defence is triggered", - async () => { - // set email whitelist - process.env.EMAIL_WHITELIST = ""; + }); + test("GIVEN SYSTEM_ROLE defence is active WHEN sending message THEN system role is added to the start of the chat history", async () => { const message = "Hello"; - const chatHistory: ChatHistoryMessage[] = []; - let defences: DefenceInfo[] = getInitialDefences(); + const isOriginalMessage = true; + const chatHistory: ChatHistoryMessage[] = [ + { + completion: { + role: "user", + content: "I'm a user", + }, + chatMessageType: CHAT_MESSAGE_TYPE.USER, + }, + { + completion: { + role: "assistant", + content: "I'm an assistant", + }, + chatMessageType: CHAT_MESSAGE_TYPE.BOT, + }, + ]; const sentEmails: EmailInfo[] = []; const chatModel: ChatModel = { id: CHAT_MODELS.GPT_4, @@ -537,43 +206,15 @@ test( }; const openAiApiKey = "sk-12345"; - const isOriginalMessage = true; - defences = activateDefence(DEFENCE_TYPES.EMAIL_WHITELIST, defences); + // activate the SYSTEM_ROLE defence + const defences = activateDefence( + DEFENCE_TYPES.SYSTEM_ROLE, + getInitialDefences() + ); // Mock the createChatCompletion function - mockCreateChatCompletion - // first time sendEmail is called - .mockResolvedValueOnce({ - data: { - choices: [ - { - message: { - role: "assistant", - content: null, - function_call: { - name: "sendEmail", - arguments: - '{\n "address": "bob@example.com",\n "subject": "Hi",\n "body": "Hello", "confirmed": "true" \n}', - }, - }, - }, - ], - }, - }) - // second time assistant sends a message - .mockResolvedValueOnce({ - data: { - choices: [ - { - message: { - role: "assistant", - content: "Email not sent", - }, - }, - ], - }, - }); + mockCreateChatCompletion.mockResolvedValueOnce(chatResponseAssistant("Hi")); // send the message const reply = await chatGptSendMessage( @@ -587,33 +228,52 @@ test( ); expect(reply).toBeDefined(); - expect(reply?.completion.content).toBe("Email not sent"); - // check that the email has not been sent - expect(sentEmails.length).toBe(0); - // message is blocked - expect(reply?.defenceInfo.isBlocked).toBe(true); - // EMAIL_WHITELIST defence is triggered - expect(reply?.defenceInfo.triggeredDefences.length).toBe(1); - expect(reply?.defenceInfo.triggeredDefences[0]).toBe( - DEFENCE_TYPES.EMAIL_WHITELIST - ); + expect(reply?.completion.content).toBe("Hi"); + // check the chat history has been updated + expect(chatHistory.length).toBe(5); + // system role is added to the start of the chat history + expect(chatHistory[0].completion?.role).toBe("system"); + expect(chatHistory[0].completion?.content).toBe(systemRoleDefault); + // rest of the chat history is in order + expect(chatHistory[1].completion?.role).toBe("user"); + expect(chatHistory[1].completion?.content).toBe("I'm a user"); + expect(chatHistory[2].completion?.role).toBe("assistant"); + expect(chatHistory[2].completion?.content).toBe("I'm an assistant"); + expect(chatHistory[3].completion?.role).toBe("user"); + expect(chatHistory[3].completion?.content).toBe("Hello"); + expect(chatHistory[4].completion?.role).toBe("assistant"); + expect(chatHistory[4].completion?.content).toBe("Hi"); // restore the mock mockCreateChatCompletion.mockRestore(); - } -); - -test( - "GIVEN the assistant sends an email AND EMAIL_WHITELIST is active AND email is in the whitelist" + - "WHEN sending message " + - "THEN email is sent AND message is not blocked AND EMAIL_WHITELIST defence is not triggered", - async () => { - // set email whitelist - process.env.EMAIL_WHITELIST = "bob@example.com"; - - const message = "Send an email to bob@example.com saying hi"; - const chatHistory: ChatHistoryMessage[] = []; - let defences: DefenceInfo[] = getInitialDefences(); + }); + + test("GIVEN SYSTEM_ROLE defence is inactive WHEN sending message THEN system role is removed from the chat history", async () => { + const message = "Hello"; + const chatHistory: ChatHistoryMessage[] = [ + { + completion: { + role: "system", + content: "You are a helpful assistant", + }, + chatMessageType: CHAT_MESSAGE_TYPE.SYSTEM, + }, + { + completion: { + role: "user", + content: "I'm a user", + }, + chatMessageType: CHAT_MESSAGE_TYPE.USER, + }, + { + completion: { + role: "assistant", + content: "I'm an assistant", + }, + chatMessageType: CHAT_MESSAGE_TYPE.BOT, + }, + ]; + const defences: DefenceInfo[] = getInitialDefences(); const sentEmails: EmailInfo[] = []; const chatModel: ChatModel = { id: CHAT_MODELS.GPT_4, @@ -626,43 +286,9 @@ test( }; const openAiApiKey = "sk-12345"; - const isOriginalMessage = true; - - defences = activateDefence(DEFENCE_TYPES.EMAIL_WHITELIST, defences); // Mock the createChatCompletion function - mockCreateChatCompletion - // first time sendEmail is called - .mockResolvedValueOnce({ - data: { - choices: [ - { - message: { - role: "assistant", - content: null, - function_call: { - name: "sendEmail", - arguments: - '{\n "address": "bob@example.com",\n "subject": "Hi",\n "body": "Hello", "confirmed": "true" \n}', - }, - }, - }, - ], - }, - }) - // second time assistant sends a message - .mockResolvedValueOnce({ - data: { - choices: [ - { - message: { - role: "assistant", - content: "Email sent", - }, - }, - ], - }, - }); + mockCreateChatCompletion.mockResolvedValueOnce(chatResponseAssistant("Hi")); // send the message const reply = await chatGptSendMessage( @@ -670,39 +296,377 @@ test( defences, chatModel, message, - isOriginalMessage, + true, openAiApiKey, sentEmails ); expect(reply).toBeDefined(); - expect(reply?.completion.content).toBe("Email sent"); - // check that the email has been sent - expect(sentEmails.length).toBe(1); - expect(sentEmails[0].address).toBe("bob@example.com"); - expect(sentEmails[0].subject).toBe("Hi"); - expect(sentEmails[0].content).toBe("Hello"); - // message is not blocked - expect(reply?.defenceInfo.isBlocked).toBe(false); - // EMAIL_WHITELIST defence is not triggered - expect(reply?.defenceInfo.triggeredDefences.length).toBe(0); + expect(reply?.completion.content).toBe("Hi"); + // check the chat history has been updated + expect(chatHistory.length).toBe(4); + // system role is removed from the start of the chat history + // rest of the chat history is in order + expect(chatHistory[0].completion?.role).toBe("user"); + expect(chatHistory[0].completion?.content).toBe("I'm a user"); + expect(chatHistory[1].completion?.role).toBe("assistant"); + expect(chatHistory[1].completion?.content).toBe("I'm an assistant"); + expect(chatHistory[2].completion?.role).toBe("user"); + expect(chatHistory[2].completion?.content).toBe("Hello"); + expect(chatHistory[3].completion?.role).toBe("assistant"); + expect(chatHistory[3].completion?.content).toBe("Hi"); // restore the mock mockCreateChatCompletion.mockRestore(); - } -); - -test( - "GIVEN the assistant sends an email AND EMAIL_WHITELIST is inactive AND email is in the whitelist" + - "WHEN sending message " + - "THEN email is sent AND message is not blocked AND EMAIL_WHITELIST defence is not triggered", - async () => { - // set email whitelist - process.env.EMAIL_WHITELIST = "bob@example.com"; - - const message = "Send an email to bob@example.com saying hi"; + }); + + test( + "GIVEN SYSTEM_ROLE defence is configured AND the system role is already in the chat history " + + "WHEN sending message THEN system role is replaced with default value in the chat history", + async () => { + const message = "Hello"; + const chatHistory: ChatHistoryMessage[] = [ + { + completion: { + role: "system", + content: "You are a helpful assistant", + }, + chatMessageType: CHAT_MESSAGE_TYPE.SYSTEM, + }, + { + completion: { + role: "user", + content: "I'm a user", + }, + chatMessageType: CHAT_MESSAGE_TYPE.USER, + }, + { + completion: { + role: "assistant", + content: "I'm an assistant", + }, + chatMessageType: CHAT_MESSAGE_TYPE.BOT, + }, + ]; + const sentEmails: EmailInfo[] = []; + const chatModel: ChatModel = { + id: CHAT_MODELS.GPT_4, + configuration: { + temperature: 1, + topP: 1, + frequencyPenalty: 0, + presencePenalty: 0, + }, + }; + + const openAiApiKey = "sk-12345"; + + const defences = configureDefence( + DEFENCE_TYPES.SYSTEM_ROLE, + activateDefence(DEFENCE_TYPES.SYSTEM_ROLE, getInitialDefences()), + [ + { + id: "systemRole", + value: "You are not a helpful assistant", + }, + ] + ); + + // Mock the createChatCompletion function + mockCreateChatCompletion.mockResolvedValueOnce( + chatResponseAssistant("Hi") + ); + + // send the message + const reply = await chatGptSendMessage( + chatHistory, + defences, + chatModel, + message, + true, + openAiApiKey, + sentEmails + ); + + expect(reply).toBeDefined(); + expect(reply?.completion.content).toBe("Hi"); + // check the chat history has been updated + expect(chatHistory.length).toBe(5); + // system role is added to the start of the chat history + expect(chatHistory[0].completion?.role).toBe("system"); + expect(chatHistory[0].completion?.content).toBe( + "You are not a helpful assistant" + ); + // rest of the chat history is in order + expect(chatHistory[1].completion?.role).toBe("user"); + expect(chatHistory[1].completion?.content).toBe("I'm a user"); + expect(chatHistory[2].completion?.role).toBe("assistant"); + expect(chatHistory[2].completion?.content).toBe("I'm an assistant"); + expect(chatHistory[3].completion?.role).toBe("user"); + expect(chatHistory[3].completion?.content).toBe("Hello"); + expect(chatHistory[4].completion?.role).toBe("assistant"); + expect(chatHistory[4].completion?.content).toBe("Hi"); + + // restore the mock + mockCreateChatCompletion.mockRestore(); + } + ); + + test( + "GIVEN the assistant sends an email AND EMAIL_WHITELIST is inactive AND email is not in the whitelist" + + "WHEN sending message " + + "THEN email is sent AND message is not blocked AND EMAIL_WHITELIST defence is alerted", + async () => { + // set email whitelist + process.env.EMAIL_WHITELIST = ""; + + const message = "Hello"; + const chatHistory: ChatHistoryMessage[] = []; + const defences: DefenceInfo[] = getInitialDefences(); + const sentEmails: EmailInfo[] = []; + const chatModel: ChatModel = { + id: CHAT_MODELS.GPT_4, + configuration: { + temperature: 1, + topP: 1, + frequencyPenalty: 0, + presencePenalty: 0, + }, + }; + + const openAiApiKey = "sk-12345"; + + // Mock the createChatCompletion function + mockCreateChatCompletion + // first time sendEmail is called + .mockResolvedValueOnce(chatResponseAssistantEmailConfirm) + // second time assistant sends a message + .mockResolvedValueOnce(chatResponseAssistant("Email sent")); + + // send the message + const reply = await chatGptSendMessage( + chatHistory, + defences, + chatModel, + message, + true, + openAiApiKey, + sentEmails + ); + + expect(reply).toBeDefined(); + expect(reply?.completion.content).toBe("Email sent"); + // check that the email has been sent + expect(sentEmails.length).toBe(1); + expect(sentEmails[0].address).toBe("bob@example.com"); + expect(sentEmails[0].subject).toBe("Hi"); + expect(sentEmails[0].content).toBe("Hello"); + // message is not blocked + expect(reply?.defenceInfo.isBlocked).toBe(false); + // EMAIL_WHITELIST defence is alerted + expect(reply?.defenceInfo.alertedDefences.length).toBe(1); + expect(reply?.defenceInfo.alertedDefences[0]).toBe( + DEFENCE_TYPES.EMAIL_WHITELIST + ); + + // restore the mock + mockCreateChatCompletion.mockRestore(); + } + ); + + test( + "GIVEN the assistant sends an email AND EMAIL_WHITELIST is active AND email is not in the whitelist" + + "WHEN sending message " + + "THEN email is not sent AND message is blocked AND EMAIL_WHITELIST defence is triggered", + async () => { + // set email whitelist + process.env.EMAIL_WHITELIST = ""; + + const message = "Hello"; + const chatHistory: ChatHistoryMessage[] = []; + const sentEmails: EmailInfo[] = []; + const chatModel: ChatModel = { + id: CHAT_MODELS.GPT_4, + configuration: { + temperature: 1, + topP: 1, + frequencyPenalty: 0, + presencePenalty: 0, + }, + }; + + const openAiApiKey = "sk-12345"; + const isOriginalMessage = true; + const defences = activateDefence( + DEFENCE_TYPES.EMAIL_WHITELIST, + getInitialDefences() + ); + + // Mock the createChatCompletion function + mockCreateChatCompletion + // first time sendEmail is called + .mockResolvedValueOnce(chatResponseAssistantEmailConfirm) + // second time assistant sends a message + .mockResolvedValueOnce(chatResponseAssistant("Email not sent")); + + // send the message + const reply = await chatGptSendMessage( + chatHistory, + defences, + chatModel, + message, + isOriginalMessage, + openAiApiKey, + sentEmails + ); + + expect(reply).toBeDefined(); + expect(reply?.completion.content).toBe("Email not sent"); + // check that the email has not been sent + expect(sentEmails.length).toBe(0); + // message is blocked + expect(reply?.defenceInfo.isBlocked).toBe(true); + // EMAIL_WHITELIST defence is triggered + expect(reply?.defenceInfo.triggeredDefences.length).toBe(1); + expect(reply?.defenceInfo.triggeredDefences[0]).toBe( + DEFENCE_TYPES.EMAIL_WHITELIST + ); + + // restore the mock + mockCreateChatCompletion.mockRestore(); + } + ); + + test( + "GIVEN the assistant sends an email AND EMAIL_WHITELIST is active AND email is in the whitelist" + + "WHEN sending message " + + "THEN email is sent AND message is not blocked AND EMAIL_WHITELIST defence is not triggered", + async () => { + // set email whitelist + process.env.EMAIL_WHITELIST = "bob@example.com"; + + const message = "Send an email to bob@example.com saying hi"; + const chatHistory: ChatHistoryMessage[] = []; + const sentEmails: EmailInfo[] = []; + const chatModel: ChatModel = { + id: CHAT_MODELS.GPT_4, + configuration: { + temperature: 1, + topP: 1, + frequencyPenalty: 0, + presencePenalty: 0, + }, + }; + + const openAiApiKey = "sk-12345"; + const isOriginalMessage = true; + const defences = activateDefence( + DEFENCE_TYPES.EMAIL_WHITELIST, + getInitialDefences() + ); + + // Mock the createChatCompletion function + mockCreateChatCompletion + // first time sendEmail is called + .mockResolvedValueOnce(chatResponseAssistantEmailConfirm) + // second time assistant sends a message + .mockResolvedValueOnce(chatResponseAssistant("Email sent")); + + // send the message + const reply = await chatGptSendMessage( + chatHistory, + defences, + chatModel, + message, + isOriginalMessage, + openAiApiKey, + sentEmails + ); + + expect(reply).toBeDefined(); + expect(reply?.completion.content).toBe("Email sent"); + // check that the email has been sent + expect(sentEmails.length).toBe(1); + expect(sentEmails[0].address).toBe("bob@example.com"); + expect(sentEmails[0].subject).toBe("Hi"); + expect(sentEmails[0].content).toBe("Hello"); + // message is not blocked + expect(reply?.defenceInfo.isBlocked).toBe(false); + // EMAIL_WHITELIST defence is not triggered + expect(reply?.defenceInfo.triggeredDefences.length).toBe(0); + + // restore the mock + mockCreateChatCompletion.mockRestore(); + } + ); + + test( + "GIVEN the assistant sends an email AND EMAIL_WHITELIST is inactive AND email is in the whitelist" + + "WHEN sending message " + + "THEN email is sent AND message is not blocked AND EMAIL_WHITELIST defence is not triggered", + async () => { + // set email whitelist + process.env.EMAIL_WHITELIST = "bob@example.com"; + + const message = "Send an email to bob@example.com saying hi"; + const chatHistory: ChatHistoryMessage[] = []; + const defences: DefenceInfo[] = getInitialDefences(); + const sentEmails: EmailInfo[] = []; + const chatModel: ChatModel = { + id: CHAT_MODELS.GPT_4, + configuration: { + temperature: 1, + topP: 1, + frequencyPenalty: 0, + presencePenalty: 0, + }, + }; + + const openAiApiKey = "sk-12345"; + const isOriginalMessage = true; + + // set email whitelist + process.env.EMAIL_WHITELIST = "bob@example.com"; + + // Mock the createChatCompletion function + mockCreateChatCompletion + // first time sendEmail is called + .mockResolvedValueOnce(chatResponseAssistantEmailConfirm) + // second time assistant sends a message + .mockResolvedValueOnce(chatResponseAssistant("Email sent")); + + // send the message + const reply = await chatGptSendMessage( + chatHistory, + defences, + chatModel, + message, + isOriginalMessage, + openAiApiKey, + sentEmails + ); + + expect(reply).toBeDefined(); + expect(reply?.completion.content).toBe("Email sent"); + // check that the email has been sent + expect(sentEmails.length).toBe(1); + expect(sentEmails[0].address).toBe("bob@example.com"); + expect(sentEmails[0].subject).toBe("Hi"); + expect(sentEmails[0].content).toBe("Hello"); + // message is not blocked + expect(reply?.defenceInfo.isBlocked).toBe(false); + // EMAIL_WHITELIST defence is not triggered + expect(reply?.defenceInfo.triggeredDefences.length).toBe(0); + + // restore the mock + mockCreateChatCompletion.mockRestore(); + } + ); + test("GIVEN the output filtering defence is active WHEN the bot responds with a message containing a phrase in the list THEN the defence is triggered and the message is blocked", async () => { + process.env.FILTER_LIST_OUTPUT = "secret project,password"; + const message = "What is the secret Project?"; + const chatHistory: ChatHistoryMessage[] = []; - const defences: DefenceInfo[] = getInitialDefences(); const sentEmails: EmailInfo[] = []; const chatModel: ChatModel = { id: CHAT_MODELS.GPT_4, @@ -716,45 +680,15 @@ test( const openAiApiKey = "sk-12345"; const isOriginalMessage = true; + const defences = activateDefence( + DEFENCE_TYPES.FILTER_BOT_OUTPUT, + getInitialDefences() + ); - // set email whitelist - process.env.EMAIL_WHITELIST = "bob@example.com"; - - // Mock the createChatCompletion function - mockCreateChatCompletion - // first time sendEmail is called - .mockResolvedValueOnce({ - data: { - choices: [ - { - message: { - role: "assistant", - content: null, - function_call: { - name: "sendEmail", - arguments: - '{\n "address": "bob@example.com",\n "subject": "Hi",\n "body": "Hello", "confirmed": "true" \n}', - }, - }, - }, - ], - }, - }) - // second time assistant sends a message - .mockResolvedValueOnce({ - data: { - choices: [ - { - message: { - role: "assistant", - content: "Email sent", - }, - }, - ], - }, - }); + mockCreateChatCompletion.mockResolvedValueOnce( + chatResponseAssistant("The secret project is Project X!") + ); - // send the message const reply = await chatGptSendMessage( chatHistory, defences, @@ -766,136 +700,20 @@ test( ); expect(reply).toBeDefined(); - expect(reply?.completion.content).toBe("Email sent"); - // check that the email has been sent - expect(sentEmails.length).toBe(1); - expect(sentEmails[0].address).toBe("bob@example.com"); - expect(sentEmails[0].subject).toBe("Hi"); - expect(sentEmails[0].content).toBe("Hello"); - // message is not blocked - expect(reply?.defenceInfo.isBlocked).toBe(false); - // EMAIL_WHITELIST defence is not triggered - expect(reply?.defenceInfo.triggeredDefences.length).toBe(0); + expect(reply?.defenceInfo.isBlocked).toBe(true); + expect(reply?.defenceInfo.triggeredDefences.length).toBe(1); + expect(reply?.defenceInfo.blockedReason).toBe( + "My original response was blocked as it contained a restricted word/phrase. Ask me something else. " + ); - // restore the mock mockCreateChatCompletion.mockRestore(); - } -); -test("GIVEN the output filtering defence is active WHEN the bot responds with a message containing a phrase in the list THEN the defence is triggered and the message is blocked", async () => { - process.env.FILTER_LIST_OUTPUT = "secret project,password"; - const message = "What is the secret Project?"; - - const chatHistory: ChatHistoryMessage[] = []; - let defences: DefenceInfo[] = getInitialDefences(); - const sentEmails: EmailInfo[] = []; - const chatModel: ChatModel = { - id: CHAT_MODELS.GPT_4, - configuration: { - temperature: 1, - topP: 1, - frequencyPenalty: 0, - presencePenalty: 0, - }, - }; - - const openAiApiKey = "sk-12345"; - const isOriginalMessage = true; - - defences = activateDefence(DEFENCE_TYPES.FILTER_BOT_OUTPUT, defences); - - mockCreateChatCompletion.mockResolvedValueOnce({ - data: { - choices: [ - { - message: { - role: "assistant", - content: "The secret project is Project X!", - }, - }, - ], - }, }); - const reply = await chatGptSendMessage( - chatHistory, - defences, - chatModel, - message, - isOriginalMessage, - openAiApiKey, - sentEmails - ); - expect(reply).toBeDefined(); - expect(reply?.defenceInfo.isBlocked).toBe(true); - expect(reply?.defenceInfo.triggeredDefences.length).toBe(1); - expect(reply?.defenceInfo.blockedReason).toBe( - "My original response was blocked as it contained a restricted word/phrase. Ask me something else. " - ); - - mockCreateChatCompletion.mockRestore(); -}); - -test("GIVEN the output filtering defence is active WHEN the bot responds with a message containing a phrase not in the list THEN the message is not blocked", async () => { - process.env.FILTER_LIST_OUTPUT = "secret project,password"; - const message = "What is the secret Project?"; - - const chatHistory: ChatHistoryMessage[] = []; - let defences: DefenceInfo[] = getInitialDefences(); - const sentEmails: EmailInfo[] = []; - const chatModel: ChatModel = { - id: CHAT_MODELS.GPT_4, - configuration: { - temperature: 1, - topP: 1, - frequencyPenalty: 0, - presencePenalty: 0, - }, - }; - const openAiApiKey = "sk-12345"; - const isOriginalMessage = true; - - defences = activateDefence(DEFENCE_TYPES.FILTER_BOT_OUTPUT, defences); - - mockCreateChatCompletion.mockResolvedValueOnce({ - data: { - choices: [ - { - message: { - role: "assistant", - content: "I cant tell you!", - }, - }, - ], - }, - }); - const reply = await chatGptSendMessage( - chatHistory, - defences, - chatModel, - message, - isOriginalMessage, - openAiApiKey, - sentEmails - ); - - expect(reply).toBeDefined(); - expect(reply?.completion.content).toBe("I cant tell you!"); - expect(reply?.defenceInfo.isBlocked).toBe(false); - expect(reply?.defenceInfo.triggeredDefences.length).toBe(0); - - mockCreateChatCompletion.mockRestore(); -}); - -test( - "GIVEN the output filtering defence is not active " + - "WHEN the bot responds with a message containing a phrase in the list " + - "THEN the defence is triggered AND the message is not blocked", - async () => { + test("GIVEN the output filtering defence is active WHEN the bot responds with a message containing a phrase not in the list THEN the message is not blocked", async () => { process.env.FILTER_LIST_OUTPUT = "secret project,password"; const message = "What is the secret Project?"; const chatHistory: ChatHistoryMessage[] = []; - const defences: DefenceInfo[] = getInitialDefences(); const sentEmails: EmailInfo[] = []; const chatModel: ChatModel = { id: CHAT_MODELS.GPT_4, @@ -906,22 +724,17 @@ test( presencePenalty: 0, }, }; - const openAiApiKey = "sk-12345"; const isOriginalMessage = true; + const defences = activateDefence( + DEFENCE_TYPES.FILTER_BOT_OUTPUT, + getInitialDefences() + ); + + mockCreateChatCompletion.mockResolvedValueOnce( + chatResponseAssistant("I cant tell you!") + ); - mockCreateChatCompletion.mockResolvedValueOnce({ - data: { - choices: [ - { - message: { - role: "assistant", - content: "The secret project is X.", - }, - }, - ], - }, - }); const reply = await chatGptSendMessage( chatHistory, defences, @@ -933,13 +746,60 @@ test( ); expect(reply).toBeDefined(); - expect(reply?.completion.content).toBe("The secret project is X."); + expect(reply?.completion.content).toBe("I cant tell you!"); expect(reply?.defenceInfo.isBlocked).toBe(false); - expect(reply?.defenceInfo.alertedDefences.length).toBe(1); - expect(reply?.defenceInfo.alertedDefences[0]).toBe( - DEFENCE_TYPES.FILTER_BOT_OUTPUT - ); + expect(reply?.defenceInfo.triggeredDefences.length).toBe(0); mockCreateChatCompletion.mockRestore(); - } -); + }); + + test( + "GIVEN the output filtering defence is not active " + + "WHEN the bot responds with a message containing a phrase in the list " + + "THEN the defence is triggered AND the message is not blocked", + async () => { + process.env.FILTER_LIST_OUTPUT = "secret project,password"; + const message = "What is the secret Project?"; + + const chatHistory: ChatHistoryMessage[] = []; + const defences = getInitialDefences(); + const sentEmails: EmailInfo[] = []; + const chatModel: ChatModel = { + id: CHAT_MODELS.GPT_4, + configuration: { + temperature: 1, + topP: 1, + frequencyPenalty: 0, + presencePenalty: 0, + }, + }; + + const openAiApiKey = "sk-12345"; + const isOriginalMessage = true; + + mockCreateChatCompletion.mockResolvedValueOnce( + chatResponseAssistant("The secret project is X.") + ); + + const reply = await chatGptSendMessage( + chatHistory, + defences, + chatModel, + message, + isOriginalMessage, + openAiApiKey, + sentEmails + ); + + expect(reply).toBeDefined(); + expect(reply?.completion.content).toBe("The secret project is X."); + expect(reply?.defenceInfo.isBlocked).toBe(false); + expect(reply?.defenceInfo.alertedDefences.length).toBe(1); + expect(reply?.defenceInfo.alertedDefences[0]).toBe( + DEFENCE_TYPES.FILTER_BOT_OUTPUT + ); + + mockCreateChatCompletion.mockRestore(); + } + ); +});