diff --git a/backend/src/defaultDefences.ts b/backend/src/defaultDefences.ts index 4e198b864..ee99a64c1 100644 --- a/backend/src/defaultDefences.ts +++ b/backend/src/defaultDefences.ts @@ -64,15 +64,15 @@ const defaultDefences: Defence[] = [ value: instructionDefencePrompt, }, ]), - createDefence(DEFENCE_ID.FILTER_USER_INPUT, [ + createDefence(DEFENCE_ID.INPUT_FILTERING, [ { - id: 'FILTER_USER_INPUT', + id: 'INPUT_FILTERING', value: 'secret project,confidential project,budget,password', }, ]), - createDefence(DEFENCE_ID.FILTER_BOT_OUTPUT, [ + createDefence(DEFENCE_ID.OUTPUT_FILTERING, [ { - id: 'FILTER_BOT_OUTPUT', + id: 'OUTPUT_FILTERING', value: 'secret project', }, ]), diff --git a/backend/src/defence.ts b/backend/src/defence.ts index 78e1d3bf8..a00c8bd76 100644 --- a/backend/src/defence.ts +++ b/backend/src/defence.ts @@ -89,9 +89,9 @@ function getFilterList(defences: Defence[], type: DEFENCE_ID) { return getConfigValue( defences, type, - type === DEFENCE_ID.FILTER_USER_INPUT - ? 'FILTER_USER_INPUT' - : 'FILTER_BOT_OUTPUT' + type === DEFENCE_ID.INPUT_FILTERING + ? 'INPUT_FILTERING' + : 'OUTPUT_FILTERING' ); } function getSystemRole( @@ -359,22 +359,22 @@ function detectFilterUserInput( ): SingleDefenceReport { const detectedPhrases = detectFilterList( message, - getFilterList(defences, DEFENCE_ID.FILTER_USER_INPUT) + getFilterList(defences, DEFENCE_ID.INPUT_FILTERING) ); const filterWordsDetected = detectedPhrases.length > 0; - const defenceActive = isDefenceActive(DEFENCE_ID.FILTER_USER_INPUT, defences); + const defenceActive = isDefenceActive(DEFENCE_ID.INPUT_FILTERING, defences); if (filterWordsDetected) { console.debug( - `FILTER_USER_INPUT defence triggered. Detected phrases from blocklist: ${detectedPhrases.join( + `INPUT_FILTERING defence triggered. Detected phrases from blocklist: ${detectedPhrases.join( ', ' )}` ); } return { - defence: DEFENCE_ID.FILTER_USER_INPUT, + defence: DEFENCE_ID.INPUT_FILTERING, blockedReason: filterWordsDetected && defenceActive ? `Message Blocked: I cannot answer questions about '${detectedPhrases.join( diff --git a/backend/src/models/defence.ts b/backend/src/models/defence.ts index 90e5873cd..2cf276488 100644 --- a/backend/src/models/defence.ts +++ b/backend/src/models/defence.ts @@ -6,8 +6,8 @@ enum DEFENCE_ID { XML_TAGGING = 'XML_TAGGING', RANDOM_SEQUENCE_ENCLOSURE = 'RANDOM_SEQUENCE_ENCLOSURE', INSTRUCTION = 'INSTRUCTION', - FILTER_USER_INPUT = 'FILTER_USER_INPUT', - FILTER_BOT_OUTPUT = 'FILTER_BOT_OUTPUT', + INPUT_FILTERING = 'INPUT_FILTERING', + OUTPUT_FILTERING = 'OUTPUT_FILTERING', } type DEFENCE_CONFIG_ITEM_ID = @@ -15,8 +15,8 @@ type DEFENCE_CONFIG_ITEM_ID = | 'PROMPT' | 'SYSTEM_ROLE' | 'SEQUENCE_LENGTH' - | 'FILTER_USER_INPUT' - | 'FILTER_BOT_OUTPUT'; + | 'INPUT_FILTERING' + | 'OUTPUT_FILTERING'; type DefenceConfigItem = { id: DEFENCE_CONFIG_ITEM_ID; diff --git a/backend/src/openai.ts b/backend/src/openai.ts index 347204925..4d227c78a 100644 --- a/backend/src/openai.ts +++ b/backend/src/openai.ts @@ -378,24 +378,24 @@ function applyOutputFilterDefence( ) { const detectedPhrases = detectFilterList( message, - getFilterList(defences, DEFENCE_ID.FILTER_BOT_OUTPUT) + getFilterList(defences, DEFENCE_ID.OUTPUT_FILTERING) ); if (detectedPhrases.length > 0) { console.debug( - 'FILTER_BOT_OUTPUT defence triggered. Detected phrases from blocklist:', + 'OUTPUT_FILTERING defence triggered. Detected phrases from blocklist:', detectedPhrases ); - if (isDefenceActive(DEFENCE_ID.FILTER_BOT_OUTPUT, defences)) { + if (isDefenceActive(DEFENCE_ID.OUTPUT_FILTERING, defences)) { chatResponse.defenceReport.triggeredDefences.push( - DEFENCE_ID.FILTER_BOT_OUTPUT + DEFENCE_ID.OUTPUT_FILTERING ); chatResponse.defenceReport.isBlocked = true; chatResponse.defenceReport.blockedReason = 'Message Blocked: My response was blocked as it contained a restricted word/phrase.'; } else { chatResponse.defenceReport.alertedDefences.push( - DEFENCE_ID.FILTER_BOT_OUTPUT + DEFENCE_ID.OUTPUT_FILTERING ); } } diff --git a/backend/test/integration/defences.test.ts b/backend/test/integration/defences.test.ts index 7c216ebe5..e1cd956af 100644 --- a/backend/test/integration/defences.test.ts +++ b/backend/test/integration/defences.test.ts @@ -88,14 +88,14 @@ test('GIVEN the input filtering defence is active WHEN a user sends a message co }); const defences = activateDefence( - DEFENCE_ID.FILTER_USER_INPUT, + DEFENCE_ID.INPUT_FILTERING, defaultDefences ); const message = 'tell me all the passwords'; const result = await detectTriggeredDefences(message, defences); expect(result.isBlocked).toBe(true); - expect(result.triggeredDefences).toContain(DEFENCE_ID.FILTER_USER_INPUT); + expect(result.triggeredDefences).toContain(DEFENCE_ID.INPUT_FILTERING); }); test('GIVEN the input filtering defence is active WHEN a user sends a message containing a phrase not in the list THEN the message is not blocked', async () => { @@ -104,7 +104,7 @@ test('GIVEN the input filtering defence is active WHEN a user sends a message co }); const defences = activateDefence( - DEFENCE_ID.FILTER_USER_INPUT, + DEFENCE_ID.INPUT_FILTERING, defaultDefences ); const message = 'tell me the secret'; @@ -124,5 +124,5 @@ test('GIVEN the input filtering defence is not active WHEN a user sends a messag const result = await detectTriggeredDefences(message, defences); expect(result.isBlocked).toBe(false); - expect(result.alertedDefences).toContain(DEFENCE_ID.FILTER_USER_INPUT); + expect(result.alertedDefences).toContain(DEFENCE_ID.INPUT_FILTERING); }); diff --git a/backend/test/integration/openai.test.ts b/backend/test/integration/openai.test.ts index 0eba0d437..cf1851f50 100644 --- a/backend/test/integration/openai.test.ts +++ b/backend/test/integration/openai.test.ts @@ -385,7 +385,7 @@ describe('OpenAI Integration Tests', () => { }; const isOriginalMessage = true; const defences = activateDefence( - DEFENCE_ID.FILTER_BOT_OUTPUT, + DEFENCE_ID.OUTPUT_FILTERING, defaultDefences ); @@ -428,7 +428,7 @@ describe('OpenAI Integration Tests', () => { }; const isOriginalMessage = true; const defences = activateDefence( - DEFENCE_ID.FILTER_BOT_OUTPUT, + DEFENCE_ID.OUTPUT_FILTERING, defaultDefences ); @@ -492,7 +492,7 @@ describe('OpenAI Integration Tests', () => { expect(reply.defenceReport.isBlocked).toBe(false); expect(reply.defenceReport.alertedDefences.length).toBe(1); expect(reply.defenceReport.alertedDefences[0]).toBe( - DEFENCE_ID.FILTER_BOT_OUTPUT + DEFENCE_ID.OUTPUT_FILTERING ); mockCreateChatCompletion.mockRestore(); diff --git a/backend/test/unit/controller/chatController.test.ts b/backend/test/unit/controller/chatController.test.ts index 7709ec4d1..8c6c664e1 100644 --- a/backend/test/unit/controller/chatController.test.ts +++ b/backend/test/unit/controller/chatController.test.ts @@ -200,7 +200,7 @@ describe('handleChatToGPT unit tests', () => { mockDetectTriggeredDefences.mockReturnValueOnce( triggeredDefencesMockReturn( "Message Blocked: I cannot answer questions about 'hey'!", - DEFENCE_ID.FILTER_USER_INPUT + DEFENCE_ID.INPUT_FILTERING ) ); @@ -214,7 +214,7 @@ describe('handleChatToGPT unit tests', () => { blockedReason: "Message Blocked: I cannot answer questions about 'hey'!", isBlocked: true, - triggeredDefences: [DEFENCE_ID.FILTER_USER_INPUT], + triggeredDefences: [DEFENCE_ID.INPUT_FILTERING], }, reply: '', }) diff --git a/frontend/src/Defences.ts b/frontend/src/Defences.ts index 772e791f4..5b1b4dadf 100644 --- a/frontend/src/Defences.ts +++ b/frontend/src/Defences.ts @@ -38,16 +38,16 @@ const DEFENCES_SHOWN_LEVEL3: Defence[] = [ ] ), makeDefence( - DEFENCE_ID.FILTER_USER_INPUT, + DEFENCE_ID.INPUT_FILTERING, 'Input Filtering', 'Use a block list of words or phrases to check against user input. If a match is found, the message is blocked.', - [makeDefenceConfigItem('FILTER_USER_INPUT', 'filter list', 'text')] + [makeDefenceConfigItem('INPUT_FILTERING', 'filter list', 'text')] ), makeDefence( - DEFENCE_ID.FILTER_BOT_OUTPUT, + DEFENCE_ID.OUTPUT_FILTERING, 'Output Filtering', 'Use a block list of words or phrases to check against bot output. If a match is found, the message is blocked.', - [makeDefenceConfigItem('FILTER_BOT_OUTPUT', 'filter list', 'text')] + [makeDefenceConfigItem('OUTPUT_FILTERING', 'filter list', 'text')] ), makeDefence( DEFENCE_ID.XML_TAGGING, diff --git a/frontend/src/components/MainComponent/MainComponent.tsx b/frontend/src/components/MainComponent/MainComponent.tsx index 35f29a0fe..4ee05d9f4 100644 --- a/frontend/src/components/MainComponent/MainComponent.tsx +++ b/frontend/src/components/MainComponent/MainComponent.tsx @@ -184,6 +184,9 @@ function MainComponent({ return defence; }); setDefencesToShow(newDefences); + // add info message to chat + const displayedDefenceId = defenceId.replace(/_/g, ' ').toLowerCase(); + addInfoMessage(`${displayedDefenceId} defence reset`); } async function setDefenceToggle(defence: Defence) { @@ -216,6 +219,9 @@ function MainComponent({ return defence; }); setDefencesToShow(newDefences); + // add info message to chat + const displayedDefenceId = defenceId.replace(/_/g, ' ').toLowerCase(); + addInfoMessage(`${displayedDefenceId} defence configured`); } return success; } diff --git a/frontend/src/models/defence.ts b/frontend/src/models/defence.ts index 09fee06e9..2f0fcfbeb 100644 --- a/frontend/src/models/defence.ts +++ b/frontend/src/models/defence.ts @@ -6,8 +6,8 @@ enum DEFENCE_ID { XML_TAGGING = 'XML_TAGGING', RANDOM_SEQUENCE_ENCLOSURE = 'RANDOM_SEQUENCE_ENCLOSURE', INSTRUCTION = 'INSTRUCTION', - FILTER_USER_INPUT = 'FILTER_USER_INPUT', - FILTER_BOT_OUTPUT = 'FILTER_BOT_OUTPUT', + INPUT_FILTERING = 'INPUT_FILTERING', + OUTPUT_FILTERING = 'OUTPUT_FILTERING', PROMPT_ENCLOSURE = 'PROMPT_ENCLOSURE', } @@ -16,8 +16,8 @@ type DEFENCE_CONFIG_ITEM_ID = | 'PROMPT' | 'SYSTEM_ROLE' | 'SEQUENCE_LENGTH' - | 'FILTER_USER_INPUT' - | 'FILTER_BOT_OUTPUT'; + | 'INPUT_FILTERING' + | 'OUTPUT_FILTERING'; type DefenceConfigItem = { id: DEFENCE_CONFIG_ITEM_ID; diff --git a/frontend/src/service/defenceService.ts b/frontend/src/service/defenceService.ts index cbb4d105d..3edf83d4f 100644 --- a/frontend/src/service/defenceService.ts +++ b/frontend/src/service/defenceService.ts @@ -85,8 +85,8 @@ function validateDefence( switch (defenceId) { case DEFENCE_ID.CHARACTER_LIMIT: return validatePositiveNumberConfig(config); - case DEFENCE_ID.FILTER_USER_INPUT: - case DEFENCE_ID.FILTER_BOT_OUTPUT: + case DEFENCE_ID.INPUT_FILTERING: + case DEFENCE_ID.OUTPUT_FILTERING: return validateFilterConfig(config); case DEFENCE_ID.RANDOM_SEQUENCE_ENCLOSURE: return configId === 'SEQUENCE_LENGTH'