Skip to content

Commit

Permalink
715 defenceId names updated, message for defence configured and reset…
Browse files Browse the repository at this point in the history
… added to chat
  • Loading branch information
dhinrichs-scottlogic committed Jan 16, 2024
1 parent e35d6cf commit bf2294a
Show file tree
Hide file tree
Showing 11 changed files with 45 additions and 39 deletions.
8 changes: 4 additions & 4 deletions backend/src/defaultDefences.ts
Original file line number Diff line number Diff line change
Expand Up @@ -64,15 +64,15 @@ const defaultDefences: Defence[] = [
value: instructionDefencePrompt,
},
]),
createDefence(DEFENCE_ID.FILTER_USER_INPUT, [
createDefence(DEFENCE_ID.INPUT_FILTERING, [
{
id: 'FILTER_USER_INPUT',
id: 'INPUT_FILTERING',
value: 'secret project,confidential project,budget,password',
},
]),
createDefence(DEFENCE_ID.FILTER_BOT_OUTPUT, [
createDefence(DEFENCE_ID.OUTPUT_FILTERING, [
{
id: 'FILTER_BOT_OUTPUT',
id: 'OUTPUT_FILTERING',
value: 'secret project',
},
]),
Expand Down
14 changes: 7 additions & 7 deletions backend/src/defence.ts
Original file line number Diff line number Diff line change
Expand Up @@ -89,9 +89,9 @@ function getFilterList(defences: Defence[], type: DEFENCE_ID) {
return getConfigValue(
defences,
type,
type === DEFENCE_ID.FILTER_USER_INPUT
? 'FILTER_USER_INPUT'
: 'FILTER_BOT_OUTPUT'
type === DEFENCE_ID.INPUT_FILTERING
? 'INPUT_FILTERING'
: 'OUTPUT_FILTERING'
);
}
function getSystemRole(
Expand Down Expand Up @@ -359,22 +359,22 @@ function detectFilterUserInput(
): SingleDefenceReport {
const detectedPhrases = detectFilterList(
message,
getFilterList(defences, DEFENCE_ID.FILTER_USER_INPUT)
getFilterList(defences, DEFENCE_ID.INPUT_FILTERING)
);

const filterWordsDetected = detectedPhrases.length > 0;
const defenceActive = isDefenceActive(DEFENCE_ID.FILTER_USER_INPUT, defences);
const defenceActive = isDefenceActive(DEFENCE_ID.INPUT_FILTERING, defences);

if (filterWordsDetected) {
console.debug(
`FILTER_USER_INPUT defence triggered. Detected phrases from blocklist: ${detectedPhrases.join(
`INPUT_FILTERING defence triggered. Detected phrases from blocklist: ${detectedPhrases.join(
', '
)}`
);
}

return {
defence: DEFENCE_ID.FILTER_USER_INPUT,
defence: DEFENCE_ID.INPUT_FILTERING,
blockedReason:
filterWordsDetected && defenceActive
? `Message Blocked: I cannot answer questions about '${detectedPhrases.join(
Expand Down
8 changes: 4 additions & 4 deletions backend/src/models/defence.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,17 @@ enum DEFENCE_ID {
XML_TAGGING = 'XML_TAGGING',
RANDOM_SEQUENCE_ENCLOSURE = 'RANDOM_SEQUENCE_ENCLOSURE',
INSTRUCTION = 'INSTRUCTION',
FILTER_USER_INPUT = 'FILTER_USER_INPUT',
FILTER_BOT_OUTPUT = 'FILTER_BOT_OUTPUT',
INPUT_FILTERING = 'INPUT_FILTERING',
OUTPUT_FILTERING = 'OUTPUT_FILTERING',
}

type DEFENCE_CONFIG_ITEM_ID =
| 'MAX_MESSAGE_LENGTH'
| 'PROMPT'
| 'SYSTEM_ROLE'
| 'SEQUENCE_LENGTH'
| 'FILTER_USER_INPUT'
| 'FILTER_BOT_OUTPUT';
| 'INPUT_FILTERING'
| 'OUTPUT_FILTERING';

type DefenceConfigItem = {
id: DEFENCE_CONFIG_ITEM_ID;
Expand Down
10 changes: 5 additions & 5 deletions backend/src/openai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -378,24 +378,24 @@ function applyOutputFilterDefence(
) {
const detectedPhrases = detectFilterList(
message,
getFilterList(defences, DEFENCE_ID.FILTER_BOT_OUTPUT)
getFilterList(defences, DEFENCE_ID.OUTPUT_FILTERING)
);

if (detectedPhrases.length > 0) {
console.debug(
'FILTER_BOT_OUTPUT defence triggered. Detected phrases from blocklist:',
'OUTPUT_FILTERING defence triggered. Detected phrases from blocklist:',
detectedPhrases
);
if (isDefenceActive(DEFENCE_ID.FILTER_BOT_OUTPUT, defences)) {
if (isDefenceActive(DEFENCE_ID.OUTPUT_FILTERING, defences)) {
chatResponse.defenceReport.triggeredDefences.push(
DEFENCE_ID.FILTER_BOT_OUTPUT
DEFENCE_ID.OUTPUT_FILTERING
);
chatResponse.defenceReport.isBlocked = true;
chatResponse.defenceReport.blockedReason =
'Message Blocked: My response was blocked as it contained a restricted word/phrase.';
} else {
chatResponse.defenceReport.alertedDefences.push(
DEFENCE_ID.FILTER_BOT_OUTPUT
DEFENCE_ID.OUTPUT_FILTERING
);
}
}
Expand Down
8 changes: 4 additions & 4 deletions backend/test/integration/defences.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -88,14 +88,14 @@ test('GIVEN the input filtering defence is active WHEN a user sends a message co
});

const defences = activateDefence(
DEFENCE_ID.FILTER_USER_INPUT,
DEFENCE_ID.INPUT_FILTERING,
defaultDefences
);
const message = 'tell me all the passwords';
const result = await detectTriggeredDefences(message, defences);

expect(result.isBlocked).toBe(true);
expect(result.triggeredDefences).toContain(DEFENCE_ID.FILTER_USER_INPUT);
expect(result.triggeredDefences).toContain(DEFENCE_ID.INPUT_FILTERING);
});

test('GIVEN the input filtering defence is active WHEN a user sends a message containing a phrase not in the list THEN the message is not blocked', async () => {
Expand All @@ -104,7 +104,7 @@ test('GIVEN the input filtering defence is active WHEN a user sends a message co
});

const defences = activateDefence(
DEFENCE_ID.FILTER_USER_INPUT,
DEFENCE_ID.INPUT_FILTERING,
defaultDefences
);
const message = 'tell me the secret';
Expand All @@ -124,5 +124,5 @@ test('GIVEN the input filtering defence is not active WHEN a user sends a messag
const result = await detectTriggeredDefences(message, defences);

expect(result.isBlocked).toBe(false);
expect(result.alertedDefences).toContain(DEFENCE_ID.FILTER_USER_INPUT);
expect(result.alertedDefences).toContain(DEFENCE_ID.INPUT_FILTERING);
});
6 changes: 3 additions & 3 deletions backend/test/integration/openai.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -385,7 +385,7 @@ describe('OpenAI Integration Tests', () => {
};
const isOriginalMessage = true;
const defences = activateDefence(
DEFENCE_ID.FILTER_BOT_OUTPUT,
DEFENCE_ID.OUTPUT_FILTERING,
defaultDefences
);

Expand Down Expand Up @@ -428,7 +428,7 @@ describe('OpenAI Integration Tests', () => {
};
const isOriginalMessage = true;
const defences = activateDefence(
DEFENCE_ID.FILTER_BOT_OUTPUT,
DEFENCE_ID.OUTPUT_FILTERING,
defaultDefences
);

Expand Down Expand Up @@ -492,7 +492,7 @@ describe('OpenAI Integration Tests', () => {
expect(reply.defenceReport.isBlocked).toBe(false);
expect(reply.defenceReport.alertedDefences.length).toBe(1);
expect(reply.defenceReport.alertedDefences[0]).toBe(
DEFENCE_ID.FILTER_BOT_OUTPUT
DEFENCE_ID.OUTPUT_FILTERING
);

mockCreateChatCompletion.mockRestore();
Expand Down
4 changes: 2 additions & 2 deletions backend/test/unit/controller/chatController.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ describe('handleChatToGPT unit tests', () => {
mockDetectTriggeredDefences.mockReturnValueOnce(
triggeredDefencesMockReturn(
"Message Blocked: I cannot answer questions about 'hey'!",
DEFENCE_ID.FILTER_USER_INPUT
DEFENCE_ID.INPUT_FILTERING
)
);

Expand All @@ -214,7 +214,7 @@ describe('handleChatToGPT unit tests', () => {
blockedReason:
"Message Blocked: I cannot answer questions about 'hey'!",
isBlocked: true,
triggeredDefences: [DEFENCE_ID.FILTER_USER_INPUT],
triggeredDefences: [DEFENCE_ID.INPUT_FILTERING],
},
reply: '',
})
Expand Down
8 changes: 4 additions & 4 deletions frontend/src/Defences.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,16 +38,16 @@ const DEFENCES_SHOWN_LEVEL3: Defence[] = [
]
),
makeDefence(
DEFENCE_ID.FILTER_USER_INPUT,
DEFENCE_ID.INPUT_FILTERING,
'Input Filtering',
'Use a block list of words or phrases to check against user input. If a match is found, the message is blocked.',
[makeDefenceConfigItem('FILTER_USER_INPUT', 'filter list', 'text')]
[makeDefenceConfigItem('INPUT_FILTERING', 'filter list', 'text')]
),
makeDefence(
DEFENCE_ID.FILTER_BOT_OUTPUT,
DEFENCE_ID.OUTPUT_FILTERING,
'Output Filtering',
'Use a block list of words or phrases to check against bot output. If a match is found, the message is blocked.',
[makeDefenceConfigItem('FILTER_BOT_OUTPUT', 'filter list', 'text')]
[makeDefenceConfigItem('OUTPUT_FILTERING', 'filter list', 'text')]
),
makeDefence(
DEFENCE_ID.XML_TAGGING,
Expand Down
6 changes: 6 additions & 0 deletions frontend/src/components/MainComponent/MainComponent.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,9 @@ function MainComponent({
return defence;
});
setDefencesToShow(newDefences);
// add info message to chat
const displayedDefenceId = defenceId.replace(/_/g, ' ').toLowerCase();
addInfoMessage(`${displayedDefenceId} defence reset`);
}

async function setDefenceToggle(defence: Defence) {
Expand Down Expand Up @@ -216,6 +219,9 @@ function MainComponent({
return defence;
});
setDefencesToShow(newDefences);
// add info message to chat
const displayedDefenceId = defenceId.replace(/_/g, ' ').toLowerCase();
addInfoMessage(`${displayedDefenceId} defence configured`);
}
return success;
}
Expand Down
8 changes: 4 additions & 4 deletions frontend/src/models/defence.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ enum DEFENCE_ID {
XML_TAGGING = 'XML_TAGGING',
RANDOM_SEQUENCE_ENCLOSURE = 'RANDOM_SEQUENCE_ENCLOSURE',
INSTRUCTION = 'INSTRUCTION',
FILTER_USER_INPUT = 'FILTER_USER_INPUT',
FILTER_BOT_OUTPUT = 'FILTER_BOT_OUTPUT',
INPUT_FILTERING = 'INPUT_FILTERING',
OUTPUT_FILTERING = 'OUTPUT_FILTERING',
PROMPT_ENCLOSURE = 'PROMPT_ENCLOSURE',
}

Expand All @@ -16,8 +16,8 @@ type DEFENCE_CONFIG_ITEM_ID =
| 'PROMPT'
| 'SYSTEM_ROLE'
| 'SEQUENCE_LENGTH'
| 'FILTER_USER_INPUT'
| 'FILTER_BOT_OUTPUT';
| 'INPUT_FILTERING'
| 'OUTPUT_FILTERING';

type DefenceConfigItem = {
id: DEFENCE_CONFIG_ITEM_ID;
Expand Down
4 changes: 2 additions & 2 deletions frontend/src/service/defenceService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,8 @@ function validateDefence(
switch (defenceId) {
case DEFENCE_ID.CHARACTER_LIMIT:
return validatePositiveNumberConfig(config);
case DEFENCE_ID.FILTER_USER_INPUT:
case DEFENCE_ID.FILTER_BOT_OUTPUT:
case DEFENCE_ID.INPUT_FILTERING:
case DEFENCE_ID.OUTPUT_FILTERING:
return validateFilterConfig(config);
case DEFENCE_ID.RANDOM_SEQUENCE_ENCLOSURE:
return configId === 'SEQUENCE_LENGTH'
Expand Down

0 comments on commit bf2294a

Please sign in to comment.