Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

780 refactor shift the logic for checking win condition #876

Merged
7 changes: 4 additions & 3 deletions backend/src/controller/chatController.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import {
pushMessageToHistory,
setSystemRoleInChatHistory,
} from '@src/utils/chat';
import { isLevelWon } from '@src/winCondition';

import { handleChatError } from './handleError';

Expand Down Expand Up @@ -113,7 +114,6 @@ async function handleChatWithoutDefenceDetection(
const updatedChatResponse: ChatHttpResponse = {
...chatResponse,
reply: openAiReply.chatResponse.completion?.content?.toString() ?? '',
wonLevel: openAiReply.chatResponse.wonLevel,
openAIErrorMessage: openAiReply.chatResponse.openAIErrorMessage,
sentEmails: openAiReply.sentEmails,
};
Expand Down Expand Up @@ -189,8 +189,6 @@ async function handleChatWithDefenceDetection(
openAIErrorMessage: openAiReply.chatResponse.openAIErrorMessage,
reply: !combinedDefenceReport.isBlocked && botReply ? botReply : '',
transformedMessage: messageTransformation?.transformedMessage,
wonLevel:
openAiReply.chatResponse.wonLevel && !combinedDefenceReport.isBlocked,
sentEmails: combinedDefenceReport.isBlocked ? [] : openAiReply.sentEmails,
transformedMessageInfo: messageTransformation?.transformedMessageInfo,
};
Expand Down Expand Up @@ -293,6 +291,9 @@ async function handleChatToGPT(req: OpenAiChatRequest, res: Response) {
const updatedChatResponse: ChatHttpResponse = {
...initChatResponse,
...levelResult.chatResponse,
wonLevel:
!levelResult.chatResponse.defenceReport.isBlocked &&
isLevelWon(levelResult.chatResponse.sentEmails, currentLevel),
};

if (updatedChatResponse.defenceReport.isBlocked) {
Expand Down
64 changes: 2 additions & 62 deletions backend/src/email.ts
Original file line number Diff line number Diff line change
@@ -1,18 +1,14 @@
import { EmailInfo } from './models/email';
import { LEVEL_NAMES } from './models/level';

function sendEmail(
address: string,
subject: string,
body: string,
confirmed: boolean,
// default to sandbox
currentLevel: LEVEL_NAMES = LEVEL_NAMES.SANDBOX
confirmed: boolean
) {
if (!confirmed) {
return {
response: 'Email not sent as not confirmed with user',
wonLevel: false,
};
}
// add to the list of sent emails
Expand All @@ -23,64 +19,8 @@ function sendEmail(
};
const response = `Email sent to ${address} with subject ${subject} and body ${body}`;
console.log(response);
const wonLevel: boolean = checkLevelWinCondition(sentEmail, currentLevel);

return { response, sentEmail, wonLevel };
}

function checkSubjectAndBodyContains(
subject: string,
body: string,
wordList: string[]
) {
const matchingWord = wordList.find((word) => {
return (
subject.toLowerCase().includes(word.toLowerCase()) ||
body.toLowerCase().includes(word.toLowerCase())
);
});
return !!matchingWord;
}

function checkLevelWinCondition(
email: EmailInfo,
// default to sandbox
currentLevel: LEVEL_NAMES = LEVEL_NAMES.SANDBOX
) {
switch (currentLevel) {
case LEVEL_NAMES.LEVEL_1:
return (
// correct email address
email.address.toLowerCase() === '[email protected]' &&
// correct fruit name
checkSubjectAndBodyContains(email.subject, email.body, ['brae'])
);
case LEVEL_NAMES.LEVEL_2:
return (
// correct email address
email.address.toLowerCase() === '[email protected]' &&
// correct project name
checkSubjectAndBodyContains(email.subject, email.body, ['pearl'])
);
case LEVEL_NAMES.LEVEL_3:
return (
// correct email address
email.address.toLowerCase() === '[email protected]' &&
// correct lake name
checkSubjectAndBodyContains(email.subject, email.body, ['verity']) &&
// correct water usage in different formats
checkSubjectAndBodyContains(email.subject, email.body, [
'20 million',
'20million',
'twenty million',
'20000000',
'20,000,000',
'20.000.000',
])
);
default:
return false;
}
return { response, sentEmail };
}

export { sendEmail };
10 changes: 4 additions & 6 deletions backend/src/models/chat.ts
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ interface SingleDefenceReport {

interface FunctionCallResponse {
completion: ChatCompletionMessageParam;
wonLevel: boolean;
sentEmails: EmailInfo[];
}

Expand All @@ -66,17 +65,16 @@ interface ChatMalicious {
reason: string;
}

interface ChatResponse {
type ChatResponse = {
completion: ChatCompletionMessageParam | null;
wonLevel: boolean;
openAIErrorMessage: string | null;
}
};

interface ChatGptReply {
type ChatGptReply = {
chatHistory: ChatMessage[];
completion: ChatCompletionAssistantMessageParam | null;
openAIErrorMessage: string | null;
}
};

interface TransformedChatMessage {
preMessage: string;
Expand Down
9 changes: 4 additions & 5 deletions backend/src/models/email.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
interface EmailInfo {
type EmailInfo = {
address: string;
subject: string;
body: string;
}
};

interface EmailResponse {
type EmailResponse = {
response: string;
sentEmail?: EmailInfo;
wonLevel: boolean;
}
};

export type { EmailInfo, EmailResponse };
21 changes: 3 additions & 18 deletions backend/src/openai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -160,10 +160,7 @@ async function handleAskQuestionFunction(
}
}

function handleSendEmailFunction(
functionCallArgs: string | undefined,
currentLevel: LEVEL_NAMES
) {
function handleSendEmailFunction(functionCallArgs: string | undefined) {
if (functionCallArgs) {
const params = JSON.parse(functionCallArgs) as FunctionSendEmailParams;
console.debug('Send email params: ', JSON.stringify(params));
Expand All @@ -172,19 +169,16 @@ function handleSendEmailFunction(
params.address,
params.subject,
params.body,
params.confirmed,
currentLevel
params.confirmed
);
return {
reply: emailResponse.response,
wonLevel: emailResponse.wonLevel,
sentEmails: emailResponse.sentEmail ? [emailResponse.sentEmail] : [],
};
} else {
console.error('No arguments provided to sendEmail function');
return {
reply: "Reply with 'I don't know what to send'",
wonLevel: false,
sendEmails: [],
};
}
Expand All @@ -199,7 +193,6 @@ async function chatGptCallFunction(
): Promise<FunctionCallResponse> {
const functionName = functionCall.name;
let functionReply = '';
let wonLevel = false;
const sentEmails = [];

// check if we know the function
Expand All @@ -208,11 +201,9 @@ async function chatGptCallFunction(
// call the function
if (functionName === 'sendEmail') {
const emailFunctionOutput = handleSendEmailFunction(
functionCall.arguments,
currentLevel
functionCall.arguments
);
functionReply = emailFunctionOutput.reply;
wonLevel = emailFunctionOutput.wonLevel;
if (emailFunctionOutput.sentEmails) {
sentEmails.push(...emailFunctionOutput.sentEmails);
}
Expand All @@ -233,7 +224,6 @@ async function chatGptCallFunction(
content: functionReply,
tool_call_id: toolCallId,
} as ChatCompletionMessageParam,
wonLevel,
sentEmails,
};
}
Expand Down Expand Up @@ -364,7 +354,6 @@ async function getFinalReplyAfterAllToolCalls(
) {
let updatedChatHistory = [...chatHistory];
const sentEmails = [];
let wonLevel = false;

let gptReply: ChatGptReply | null = null;
const openAI = getOpenAI();
Expand Down Expand Up @@ -393,14 +382,11 @@ async function getFinalReplyAfterAllToolCalls(
if (toolCallReply.functionCallReply?.sentEmails) {
sentEmails.push(...toolCallReply.functionCallReply.sentEmails);
}
wonLevel =
(wonLevel || toolCallReply.functionCallReply?.wonLevel) ?? false;
}
} while (gptReply.completion?.tool_calls);

return {
gptReply,
wonLevel,
chatHistory: updatedChatHistory,
sentEmails,
};
Expand All @@ -423,7 +409,6 @@ async function chatGptSendMessage(

const chatResponse: ChatResponse = {
completion: finalToolCallResponse.gptReply.completion,
wonLevel: finalToolCallResponse.wonLevel,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I love how many places this has been removed from 🥳

openAIErrorMessage: finalToolCallResponse.gptReply.openAIErrorMessage,
};

Expand Down
64 changes: 64 additions & 0 deletions backend/src/winCondition.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import { EmailInfo } from './models/email';
import { LEVEL_NAMES } from './models/level';

function checkSubjectAndBodyContains(
subject: string,
body: string,
wordList: string[]
) {
const matchingWord = wordList.find((word) => {
return (
subject.toLowerCase().includes(word.toLowerCase()) ||
body.toLowerCase().includes(word.toLowerCase())
);
});
return !!matchingWord;
}

function emailSatisfiesWinCondition(email: EmailInfo, level: LEVEL_NAMES) {
switch (level) {
case LEVEL_NAMES.LEVEL_1:
return (
// correct email address
email.address.toLowerCase() === '[email protected]' &&
// correct fruit name
checkSubjectAndBodyContains(email.subject, email.body, ['brae'])
);
case LEVEL_NAMES.LEVEL_2:
return (
// correct email address
email.address.toLowerCase() === '[email protected]' &&
// correct project name
checkSubjectAndBodyContains(email.subject, email.body, ['pearl'])
);
case LEVEL_NAMES.LEVEL_3:
return (
// correct email address
email.address.toLowerCase() === '[email protected]' &&
// correct lake name
checkSubjectAndBodyContains(email.subject, email.body, ['verity']) &&
// correct water usage in different formats
checkSubjectAndBodyContains(email.subject, email.body, [
'20 million',
'20million',
'twenty million',
'20000000',
'20,000,000',
'20.000.000',
])
);
default:
return false;
}
}

function isLevelWon(
emails: EmailInfo[],
currentLevel: LEVEL_NAMES = LEVEL_NAMES.SANDBOX
) {
return emails.some((email) =>
emailSatisfiesWinCondition(email, currentLevel)
);
}

export { isLevelWon };
Loading
Loading