ScottLogic · pmarsh-scottlogic · Feb 21, 2024 · Feb 1, 2024 · Feb 1, 2024 · Feb 1, 2024
diff --git a/backend/src/controller/chatController.ts b/backend/src/controller/chatController.ts
@@ -100,17 +100,17 @@ async function handleChatWithoutDefenceDetection(
 	chatHistory: ChatMessage[],
 	defences: Defence[]
 ): Promise<LevelHandlerResponse> {
+	console.log(`User message: '${message}'`);
+
 	const updatedChatHistory = createNewUserMessages(message).reduce(
 		pushMessageToHistory,
 		chatHistory
 	);
 
-	// get the chatGPT reply
 	const openAiReply = await chatGptSendMessage(
 		updatedChatHistory,
 		defences,
 		chatModel,
-		message,
 		currentLevel
 	);
 
@@ -146,11 +146,16 @@ async function handleChatWithDefenceDetection(
 		defences
 	);
 
+	console.log(
+		`User message: '${
+			messageTransformation?.transformedMessageCombined ?? message
+		}'`
+	);
+
 	const openAiReplyPromise = chatGptSendMessage(
 		chatHistoryWithNewUserMessages,
 		defences,
 		chatModel,
-		messageTransformation?.transformedMessageCombined ?? message,
 		currentLevel
 	);
 
@@ -196,7 +201,6 @@ async function handleChatWithDefenceDetection(
 }
 
 async function handleChatToGPT(req: OpenAiChatRequest, res: Response) {
-	// set reply params
 	const initChatResponse: ChatHttpResponse = {
 		reply: '',
 		defenceReport: {
@@ -232,9 +236,6 @@ async function handleChatToGPT(req: OpenAiChatRequest, res: Response) {
 		);
 		return;
 	}
-	const totalSentEmails: EmailInfo[] = [
-		...req.session.levelState[currentLevel].sentEmails,
-	];
 
 	// use default model for levels, allow user to select in sandbox
 	const chatModel =
@@ -283,15 +284,18 @@ async function handleChatToGPT(req: OpenAiChatRequest, res: Response) {
 	}
 
 	let updatedChatHistory = levelResult.chatHistory;
-	totalSentEmails.push(...levelResult.chatResponse.sentEmails);
+
+	const totalSentEmails: EmailInfo[] = [
+		...req.session.levelState[currentLevel].sentEmails,
+		...levelResult.chatResponse.sentEmails,
+	];
 
 	const updatedChatResponse: ChatHttpResponse = {
 		...initChatResponse,
 		...levelResult.chatResponse,
 	};
 
 	if (updatedChatResponse.defenceReport.isBlocked) {
-		// chatReponse.reply is empty if blocked
 		updatedChatHistory = pushMessageToHistory(updatedChatHistory, {
 			chatMessageType: 'BOT_BLOCKED',
 			infoMessage:
@@ -326,7 +330,6 @@ async function handleChatToGPT(req: OpenAiChatRequest, res: Response) {
 		});
 	}
 
-	// update state
 	req.session.levelState[currentLevel].chatHistory = updatedChatHistory;
 	req.session.levelState[currentLevel].sentEmails = totalSentEmails;
 

diff --git a/backend/src/defence.ts b/backend/src/defence.ts
@@ -1,5 +1,5 @@
 import { defaultDefences } from './defaultDefences';
-import { queryPromptEvaluationModel } from './langchain';
+import { evaluatePrompt } from './langchain';
 import {
 	ChatDefenceReport,
 	MessageTransformation,
@@ -454,12 +454,12 @@ async function detectEvaluationLLM(
 	if (isDefenceActive(DEFENCE_ID.PROMPT_EVALUATION_LLM, defences)) {
 		const promptEvalLLMPrompt = getPromptEvalPromptFromConfig(defences);
 
-		const evaluationResult = await queryPromptEvaluationModel(
+		const promptIsMalicious = await evaluatePrompt(
 			message,
 			promptEvalLLMPrompt
 		);
 
-		if (evaluationResult.isMalicious) {
+		if (promptIsMalicious) {
 			console.debug('LLM evaluation defence active and prompt is malicious.');
 
 			return {

diff --git a/backend/src/document.ts b/backend/src/document.ts
@@ -103,12 +103,11 @@ async function initDocumentVectors() {
 		);
 
 		// embed and store the splits - will use env variable for API key
-		const embeddings = new OpenAIEmbeddings();
 		const docVector = await MemoryVectorStore.fromDocuments(
 			commonAndLevelDocuments,
-			embeddings
+			new OpenAIEmbeddings()
 		);
-		// store the document vectors for the level
+
 		docVectors.push({
 			level,
 			docVector,

diff --git a/backend/src/langchain.ts b/backend/src/langchain.ts
@@ -4,7 +4,7 @@ import { OpenAI } from 'langchain/llms/openai';
 import { PromptTemplate } from 'langchain/prompts';
 
 import { getDocumentVectors } from './document';
-import { CHAT_MODELS, ChatAnswer } from './models/chat';
+import { CHAT_MODELS } from './models/chat';
 import { PromptEvaluationChainReply, QaChainReply } from './models/langchain';
 import { LEVEL_NAMES } from './models/level';
 import { getOpenAIKey, getValidOpenAIModelsList } from './openai';
@@ -23,7 +23,6 @@ function makePromptTemplate(
 	templateNameForLogging: string
 ): PromptTemplate {
 	if (!configPrompt) {
-		// use the default Prompt
 		configPrompt = defaultPrompt;
 	}
 	const fullPrompt = `${configPrompt}\n${mainPrompt}`;
@@ -40,10 +39,8 @@ function getChatModel() {
 function initQAModel(level: LEVEL_NAMES, Prompt: string) {
 	const openAIApiKey = getOpenAIKey();
 	const documentVectors = getDocumentVectors()[level].docVector;
-	// use gpt-4 if avaliable to apiKey
 	const modelName = getChatModel();
 
-	// initialise model
 	const model = new ChatOpenAI({
 		modelName,
 		streaming: true,
@@ -63,7 +60,6 @@ function initQAModel(level: LEVEL_NAMES, Prompt: string) {
 
 function initPromptEvaluationModel(configPromptEvaluationPrompt: string) {
 	const openAIApiKey = getOpenAIKey();
-	// use gpt-4 if avaliable to apiKey
 	const modelName = getChatModel();
 
 	const promptEvalTemplate = makePromptTemplate(
@@ -79,87 +75,75 @@ function initPromptEvaluationModel(configPromptEvaluationPrompt: string) {
 		openAIApiKey,
 	});
 
-	const chain = new LLMChain({
+	console.debug(`Prompt evaluation model initialised with model: ${modelName}`);
+
+	return new LLMChain({
 		llm,
 		prompt: promptEvalTemplate,
 		outputKey: 'promptEvalOutput',
 	});
-
-	console.debug(`Prompt evaluation model initialised with model: ${modelName}`);
-	return chain;
 }
 
-// ask the question and return models answer
 async function queryDocuments(
 	question: string,
 	Prompt: string,
 	currentLevel: LEVEL_NAMES
-) {
+): Promise<string> {
 	try {
 		const qaChain = initQAModel(currentLevel, Prompt);
 
-		// get start time
 		const startTime = Date.now();
 		console.debug('Calling QA model...');
 		const response = (await qaChain.call({
 			query: question,
 		})) as QaChainReply;
-		// log the time taken
-		console.debug(`QA model call took ${Date.now() - startTime}ms`);
 
+		console.debug(`QA model call took ${Date.now() - startTime}ms`);
 		console.debug(`QA model response: ${response.text}`);
-		const result: ChatAnswer = {
-			reply: response.text,
-			questionAnswered: true,
-		};
-		return result;
+
+		return response.text;
 	} catch (error) {
 		console.error('Error calling QA model: ', error);
-		return {
-			reply: 'I cannot answer that question right now.',
-			questionAnswered: false,
-		};
+		return 'I cannot answer that question right now.';
 	}
 }
 
-// ask LLM whether the prompt is malicious
-async function queryPromptEvaluationModel(
-	input: string,
-	promptEvalPrompt: string
-) {
+async function evaluatePrompt(input: string, promptEvalPrompt: string) {
 	try {
 		console.debug(`Checking '${input}' for malicious prompts`);
 		const promptEvaluationChain = initPromptEvaluationModel(promptEvalPrompt);
-		// get start time
 		const startTime = Date.now();
 		console.debug('Calling prompt evaluation model...');
+
 		const response = (await promptEvaluationChain.call({
 			prompt: input,
 		})) as PromptEvaluationChainReply;
-		// log the time taken
+
 		console.debug(
 			`Prompt evaluation model call took ${Date.now() - startTime}ms`
 		);
-		const promptEvaluation = formatEvaluationOutput(response.promptEvalOutput);
+		const promptEvaluation = interpretEvaluationOutput(
+			response.promptEvalOutput
+		);
 		console.debug(`Prompt evaluation: ${JSON.stringify(promptEvaluation)}`);
 		return promptEvaluation;
 	} catch (error) {
 		console.error('Error calling prompt evaluation model: ', error);
-		return { isMalicious: false };
+		return false;
 	}
 }
 
-function formatEvaluationOutput(response: string) {
+function interpretEvaluationOutput(response: string) {
 	// remove all non-alphanumeric characters
 	const cleanResponse = response.replace(/\W/g, '').toLowerCase();
 	if (cleanResponse === 'yes' || cleanResponse === 'no') {
-		return { isMalicious: cleanResponse === 'yes' };
+		return cleanResponse === 'yes';
 	} else {
 		console.debug(
 			`Did not get a valid response from the prompt evaluation model. Original response: ${response}`
 		);
-		return { isMalicious: false };
+		return false;
 	}
 }
 
-export { queryDocuments, queryPromptEvaluationModel };
+export { queryDocuments, evaluatePrompt };
diff --git a/backend/src/models/chat.ts b/backend/src/models/chat.ts
@@ -61,11 +61,6 @@ interface ToolCallResponse {
 	chatHistory: ChatMessage[];
 }
 
-interface ChatAnswer {
-	reply: string;
-	questionAnswered: boolean;
-}
-
 interface ChatMalicious {
 	isMalicious: boolean;
 	reason: string;
@@ -123,7 +118,6 @@ const defaultChatModel: ChatModel = {
 };
 
 export type {
-	ChatAnswer,
 	ChatDefenceReport,
 	ChatGptReply,
 	ChatMalicious,