From 8215fd0a6d0321da3c3cd8a3800d6cc0d1adc8a8 Mon Sep 17 00:00:00 2001 From: Chris Wilton-Magras Date: Mon, 5 Feb 2024 11:09:25 +0000 Subject: [PATCH] 810: Split session and non-session routes (#811) --- .gitattributes | 2 +- backend/.env.example | 1 + backend/Dockerfile | 2 +- backend/docker-compose.yml | 4 +- backend/package-lock.json | 28 +---- backend/package.json | 5 +- backend/src/app.ts | 80 ++----------- backend/src/document.ts | 3 +- backend/src/langchain.ts | 5 +- backend/src/models/chat.ts | 13 +- backend/src/nonSessionRoutes.ts | 21 ++++ backend/src/openai.ts | 2 +- backend/src/router.ts | 68 ----------- backend/src/sessionRoutes.ts | 133 +++++++++++++++++++++ backend/test/integration/langchain.test.ts | 52 ++++---- backend/tsconfig.json | 2 +- 16 files changed, 215 insertions(+), 206 deletions(-) create mode 100644 backend/src/nonSessionRoutes.ts delete mode 100644 backend/src/router.ts create mode 100644 backend/src/sessionRoutes.ts diff --git a/.gitattributes b/.gitattributes index a6731222a..111a7a617 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,5 +1,5 @@ # Always use LF for line endings * text eol=lf -# Apart from font files +# Apart from font files and images *.[ot]tf binary *.png -text diff --git a/backend/.env.example b/backend/.env.example index 7f65c5b57..905ffccfb 100644 --- a/backend/.env.example +++ b/backend/.env.example @@ -1,2 +1,3 @@ OPENAI_API_KEY=YOUR_API_KEY SESSION_SECRET=YOUR_SESSION_SECRET +CORS_ALLOW_ORIGIN=http://localhost:5173 diff --git a/backend/Dockerfile b/backend/Dockerfile index 43405a575..ffe2f31a3 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -5,4 +5,4 @@ RUN npm ci COPY . . EXPOSE 3001 RUN npm run build -CMD ["node", "--import=tsx/esm", "./build/server.js"] +CMD ["node", "--import=tsx", "./src/server.ts"] diff --git a/backend/docker-compose.yml b/backend/docker-compose.yml index 7a44ed0cb..cd26aedb5 100644 --- a/backend/docker-compose.yml +++ b/backend/docker-compose.yml @@ -1,8 +1,10 @@ services: prompt-injection-api: environment: - NODE_ENV: development + NODE_ENV: ${NODE_ENV:-development} OPENAI_API_KEY: ${OPENAI_API_KEY} + SESSION_SECRET: ${SESSION_SECRET} + CORS_ALLOW_ORIGIN: '${CORS_ALLOW_ORIGIN:-*}' PORT: 3001 build: . image: 'scottlogic/prompt-injection-api' diff --git a/backend/package-lock.json b/backend/package-lock.json index d0516a778..cdf0ea602 100644 --- a/backend/package-lock.json +++ b/backend/package-lock.json @@ -17,8 +17,7 @@ "memorystore": "^1.6.7", "openai": "^4.19.0", "openai-chat-tokens": "^0.2.8", - "pdf-parse": "^1.1.1", - "react": "^18.2.0" + "pdf-parse": "^1.1.1" }, "devDependencies": { "@jest/globals": "^29.7.0", @@ -5434,7 +5433,8 @@ "node_modules/js-tokens": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz", - "integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==" + "integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==", + "dev": true }, "node_modules/js-yaml": { "version": "4.1.0", @@ -5901,17 +5901,6 @@ "integrity": "sha512-0KpjqXRVvrYyCsX1swR/XTK0va6VQkQM6MNo7PqW77ByjAhoARA8EfrP1N4+KlKj8YS0ZUCtRT/YUuhyYDujIQ==", "dev": true }, - "node_modules/loose-envify": { - "version": "1.4.0", - "resolved": "https://registry.npmjs.org/loose-envify/-/loose-envify-1.4.0.tgz", - "integrity": "sha512-lyuxPGr/Wfhrlem2CL/UcnUc1zcqKAImBDzukY7Y5F/yQiNdko6+fRLevlw1HgMySw7f611UIY408EtxRSoK3Q==", - "dependencies": { - "js-tokens": "^3.0.0 || ^4.0.0" - }, - "bin": { - "loose-envify": "cli.js" - } - }, "node_modules/lru-cache": { "version": "5.1.1", "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-5.1.1.tgz", @@ -6908,17 +6897,6 @@ "node": ">= 0.8" } }, - "node_modules/react": { - "version": "18.2.0", - "resolved": "https://registry.npmjs.org/react/-/react-18.2.0.tgz", - "integrity": "sha512-/3IjMdb2L9QbBdWiW5e3P2/npwMBaU9mHCSCUzNln0ZCYbcfTsGbTJrU/kGemdH2IWmB2ioZ+zkxtmq6g09fGQ==", - "dependencies": { - "loose-envify": "^1.1.0" - }, - "engines": { - "node": ">=0.10.0" - } - }, "node_modules/react-is": { "version": "18.2.0", "resolved": "https://registry.npmjs.org/react-is/-/react-is-18.2.0.tgz", diff --git a/backend/package.json b/backend/package.json index cd1f231f1..10865e543 100644 --- a/backend/package.json +++ b/backend/package.json @@ -3,7 +3,7 @@ "version": "0.1.0", "type": "module", "scripts": { - "build": "tsc", + "build": "tsc --noEmit", "dev": "tsx watch -r dotenv/config src/server.ts", "start": "tsx -r dotenv/config src/server.ts", "docker:start": "docker compose up -d", @@ -23,8 +23,7 @@ "memorystore": "^1.6.7", "openai": "^4.19.0", "openai-chat-tokens": "^0.2.8", - "pdf-parse": "^1.1.1", - "react": "^18.2.0" + "pdf-parse": "^1.1.1" }, "devDependencies": { "@jest/globals": "^29.7.0", diff --git a/backend/src/app.ts b/backend/src/app.ts index 642eeef05..5a965a1ec 100644 --- a/backend/src/app.ts +++ b/backend/src/app.ts @@ -1,74 +1,16 @@ import cors from 'cors'; -import dotenv from 'dotenv'; import express from 'express'; -import session from 'express-session'; -import memoryStoreFactory from 'memorystore'; -import { fileURLToPath } from 'node:url'; -import { importMetaUrl } from './importMetaUtils'; -import { ChatModel, defaultChatModel } from './models/chat'; -import { LevelState, getInitialLevelStates } from './models/level'; -import { router } from './router'; +import nonSessionRoutes from './nonSessionRoutes'; +import sessionRoutes from './sessionRoutes'; -dotenv.config(); - -declare module 'express-session' { - interface Session { - initialised: boolean; - chatModel: ChatModel; - levelState: LevelState[]; - } -} - -const app = express(); -const isProd = app.get('env') === 'production'; - -// for parsing application/json -app.use(express.json()); - -// use session storage - currently in-memory, but in future use Redis in prod builds -const maxAge = 60 * 60 * 1000 * (isProd ? 1 : 8); //1 hour in prod, 8hrs in dev -const sessionOpts: session.SessionOptions = { - store: new (memoryStoreFactory(session))({ - checkPeriod: maxAge, - }), - secret: process.env.SESSION_SECRET ?? 'secret', - name: 'prompt-injection.sid', - resave: false, - saveUninitialized: true, - cookie: { - secure: isProd, - maxAge, - }, -}; - -app.use(session(sessionOpts)); - -app.use( - cors({ - credentials: true, - origin: true, - }) -); - -app.use((req, _res, next) => { - // initialise session variables first time - if (!req.session.initialised) { - req.session.chatModel = defaultChatModel; - req.session.levelState = getInitialLevelStates(); - req.session.initialised = true; - } - next(); -}); - -app.use('/', router); - -// serve the documents folder -app.use( - '/documents', - express.static( - fileURLToPath(new URL('../resources/documents', importMetaUrl())) +export default express() + .use(express.json()) + .use( + cors({ + origin: process.env.CORS_ALLOW_ORIGIN, + credentials: true, + }) ) -); - -export default app; + .use('/', nonSessionRoutes) + .use('/', sessionRoutes); diff --git a/backend/src/document.ts b/backend/src/document.ts index f8b32d0dc..3c10b6812 100644 --- a/backend/src/document.ts +++ b/backend/src/document.ts @@ -1,9 +1,9 @@ -import * as fs from 'fs'; import { CSVLoader } from 'langchain/document_loaders/fs/csv'; import { DirectoryLoader } from 'langchain/document_loaders/fs/directory'; import { PDFLoader } from 'langchain/document_loaders/fs/pdf'; import { TextLoader } from 'langchain/document_loaders/fs/text'; import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter'; +import * as fs from 'node:fs'; import { DocumentMeta } from './models/document'; import { LEVEL_NAMES } from './models/level'; @@ -28,6 +28,7 @@ async function getDocuments(filePath: string) { '.csv': (path: string) => new CSVLoader(path), }); const docs = await loader.load(); + console.debug(`${docs.length} documents found`); // split the documents into chunks const textSplitter = new RecursiveCharacterTextSplitter({ diff --git a/backend/src/langchain.ts b/backend/src/langchain.ts index 024f0aa7a..89d46e624 100644 --- a/backend/src/langchain.ts +++ b/backend/src/langchain.ts @@ -20,11 +20,12 @@ import { // store vectorised documents for each level as array const vectorisedDocuments = (() => { - let docs: DocumentsVector[] = []; + const docs: DocumentsVector[] = []; return { get: () => docs, set: (newDocs: DocumentsVector[]) => { - docs = newDocs; + while (docs.length > 0) docs.pop(); + docs.push(...newDocs); }, }; })(); diff --git a/backend/src/models/chat.ts b/backend/src/models/chat.ts index 5a29f05c0..69bbe1fb1 100644 --- a/backend/src/models/chat.ts +++ b/backend/src/models/chat.ts @@ -149,21 +149,16 @@ export type { ChatDefenceReport, ChatGptReply, ChatMalicious, + ChatModel, + ChatModelConfiguration, ChatResponse, LevelHandlerResponse, ChatHttpResponse, ChatHistoryMessage, + SingleDefenceReport, TransformedChatMessage, FunctionCallResponse, ToolCallResponse, MessageTransformation, }; -export { - CHAT_MODELS, - CHAT_MESSAGE_TYPE, - MODEL_CONFIG, - ChatModel, - ChatModelConfiguration, - defaultChatModel, - SingleDefenceReport, -}; +export { CHAT_MODELS, CHAT_MESSAGE_TYPE, MODEL_CONFIG, defaultChatModel }; diff --git a/backend/src/nonSessionRoutes.ts b/backend/src/nonSessionRoutes.ts new file mode 100644 index 000000000..cae39c5ee --- /dev/null +++ b/backend/src/nonSessionRoutes.ts @@ -0,0 +1,21 @@ +import express from 'express'; +import { fileURLToPath } from 'node:url'; + +import { handleGetDocuments } from './controller/documentController'; +import { handleHealthCheck } from './controller/healthController'; +import { handleGetSystemRoles } from './controller/systemRoleController'; +import { importMetaUrl } from './importMetaUtils'; + +const router = express.Router(); + +router.use( + '/documents', + express.static( + fileURLToPath(new URL('../resources/documents', importMetaUrl())) + ) +); +router.get('/documents', handleGetDocuments); +router.get('/health', handleHealthCheck); +router.get('/systemRoles', handleGetSystemRoles); + +export default router; diff --git a/backend/src/openai.ts b/backend/src/openai.ts index 680e231f6..e9eb06b87 100644 --- a/backend/src/openai.ts +++ b/backend/src/openai.ts @@ -101,7 +101,7 @@ const getOpenAIKey = (() => { openAIKey = process.env.OPENAI_API_KEY; if (!openAIKey) { throw new Error( - 'OpenAI API key not found in environment vars - cannot continue!' + 'OPENAI_API_KEY not found in environment vars, cannot continue!' ); } } diff --git a/backend/src/router.ts b/backend/src/router.ts deleted file mode 100644 index 695429dd3..000000000 --- a/backend/src/router.ts +++ /dev/null @@ -1,68 +0,0 @@ -import express from 'express'; - -import { - handleChatToGPT, - handleGetChatHistory, - handleAddToChatHistory, - handleClearChatHistory, -} from './controller/chatController'; -import { - handleConfigureDefence, - handleDefenceActivation, - handleDefenceDeactivation, - handleGetDefenceStatus, - handleResetSingleDefence, -} from './controller/defenceController'; -import { handleGetDocuments } from './controller/documentController'; -import { - handleClearEmails, - handleGetEmails, -} from './controller/emailController'; -import { handleHealthCheck } from './controller/healthController'; -import { - handleConfigureModel, - handleGetModel, - handleGetValidModels, - handleSetModel, -} from './controller/modelController'; -import { handleResetProgress } from './controller/resetController'; -import { handleGetSystemRoles } from './controller/systemRoleController'; - -const router = express.Router(); - -// health -router.get('/health', handleHealthCheck); - -// defences -router.post('/defence/activate', handleDefenceActivation); -router.post('/defence/deactivate', handleDefenceDeactivation); -router.post('/defence/configure', handleConfigureDefence); -router.post('/defence/resetConfig', handleResetSingleDefence); -router.get('/defence/status', handleGetDefenceStatus); - -// emails -router.get('/email/get', handleGetEmails); -router.post('/email/clear', handleClearEmails); - -// chat -router.post('/openai/chat', handleChatToGPT); -router.get('/openai/history', handleGetChatHistory); -router.post('/openai/addHistory', handleAddToChatHistory); -router.post('/openai/clear', handleClearChatHistory); - -// model configurations -router.post('/openai/model', handleSetModel); -router.post('/openai/model/configure', handleConfigureModel); -router.get('/openai/model', handleGetModel); -router.get('/openai/validModels', handleGetValidModels); - -// system roles -router.get('/systemRoles', handleGetSystemRoles); - -// getting documents -router.get('/documents', handleGetDocuments); - -// reset progress for all levels -router.post('/reset', handleResetProgress); - -export { router }; diff --git a/backend/src/sessionRoutes.ts b/backend/src/sessionRoutes.ts new file mode 100644 index 000000000..af9b648fa --- /dev/null +++ b/backend/src/sessionRoutes.ts @@ -0,0 +1,133 @@ +import 'dotenv/config'; +import express from 'express'; +import session from 'express-session'; +import memoryStoreFactory from 'memorystore'; + +import { + handleChatToGPT, + handleGetChatHistory, + handleAddToChatHistory, + handleClearChatHistory, +} from './controller/chatController'; +import { + handleConfigureDefence, + handleDefenceActivation, + handleDefenceDeactivation, + handleGetDefenceStatus, + handleResetSingleDefence, +} from './controller/defenceController'; +import { + handleClearEmails, + handleGetEmails, +} from './controller/emailController'; +import { + handleConfigureModel, + handleGetModel, + handleGetValidModels, + handleSetModel, +} from './controller/modelController'; +import { handleResetProgress } from './controller/resetController'; +import { ChatModel, defaultChatModel } from './models/chat'; +import { LevelState, getInitialLevelStates } from './models/level'; + +declare module 'express-session' { + interface Session { + initialised: boolean; + chatModel: ChatModel; + levelState: LevelState[]; + } +} + +const sessionSigningSecret = process.env.SESSION_SECRET; +if (!sessionSigningSecret) { + console.error( + 'SESSION_SECRET not found in environment vars, cannot continue!' + ); + process.exit(1); +} + +const router = express.Router(); + +const stage = process.env.NODE_ENV; +console.log(`env=${stage}`); +const isProd = stage === 'production'; +const cookieStaleHours = isProd ? 2 : 8; +const oneHourInMillis = 60 * 60 * 1000; +const maxAge = oneHourInMillis * cookieStaleHours; + +router.use( + session({ + name: 'prompt-injection.sid', + resave: false, + saveUninitialized: true, + secret: sessionSigningSecret, + // Session storage: currently in-memory but could use Redis in AWS + store: new (memoryStoreFactory(session))({ + checkPeriod: oneHourInMillis, + }), + proxy: isProd, + cookie: { + maxAge, + /* + https://developer.mozilla.org/en-US/blog/goodbye-third-party-cookies/ + Now that browsers have begun clamping down on non-secure Cookies, we + need to set secure=true in prod, until we can put Route53 in front of both + UI and API and get rid of APIGateway entirely. The showstopper is that + APIGateway is not adding Forwarded headers correctly, so the (secure) + session Cookie is no longer working in Prod. + See + https://repost.aws/questions/QUtBHMaz7IQ6aM4RCBMnJvgw/why-does-apigw-http-api-use-forwarded-header-while-other-services-still-use-x-forwarded-headers + */ + sameSite: isProd ? 'none' : 'strict', + secure: isProd, + }, + }) +); + +router.use((req, _res, next) => { + if (!req.session.initialised) { + req.session.chatModel = defaultChatModel; + req.session.levelState = getInitialLevelStates(); + req.session.initialised = true; + } + next(); +}); + +// defences +router.get('/defence/status', handleGetDefenceStatus); +router.post('/defence/activate', handleDefenceActivation); +router.post('/defence/deactivate', handleDefenceDeactivation); +router.post('/defence/configure', handleConfigureDefence); +router.post('/defence/resetConfig', handleResetSingleDefence); + +// emails +router.get('/email/get', handleGetEmails); +router.post('/email/clear', handleClearEmails); + +// chat +router.get('/openai/history', handleGetChatHistory); +router.post('/openai/chat', handleChatToGPT); +router.post('/openai/addHistory', handleAddToChatHistory); +router.post('/openai/clear', handleClearChatHistory); + +// model configurations +router.get('/openai/validModels', handleGetValidModels); +router.get('/openai/model', handleGetModel); +router.post('/openai/model', handleSetModel); +router.post('/openai/model/configure', handleConfigureModel); + +// reset progress for all levels +router.post('/reset', handleResetProgress); + +// Debugging: log headers in prod for primary routes +if (isProd) { + router.use('/openai', (req, res, next) => { + console.log('Request:', req.path, `secure=${req.secure}`, req.headers); + res.on('finish', () => { + console.log('Response:', req.path, res.getHeaders()); + }); + next(); + }); +} + +export default router; diff --git a/backend/test/integration/langchain.test.ts b/backend/test/integration/langchain.test.ts index 84169f5a2..7c87589b9 100644 --- a/backend/test/integration/langchain.test.ts +++ b/backend/test/integration/langchain.test.ts @@ -1,5 +1,6 @@ import { afterEach, + beforeAll, beforeEach, describe, test, @@ -8,6 +9,7 @@ import { } from '@jest/globals'; import { RetrievalQAChain } from 'langchain/chains'; import { ChatOpenAI } from 'langchain/chat_models/openai'; +import { Document } from 'langchain/document'; import { PromptTemplate } from 'langchain/prompts'; import { @@ -31,9 +33,10 @@ const mockPromptEvalChain = { }; const mockFromLLM = jest.fn<() => typeof mockRetrievalQAChain>(); const mockFromTemplate = jest.fn(); -const mockLoader = jest.fn(); -const mockSplitDocuments = jest.fn<() => Promise>(); const mockAsRetriever = jest.fn(); +const mockLoader = + jest.fn<() => Promise>[]>>(); +const mockSplitDocuments = jest.fn<() => Promise>(); // eslint-disable-next-line prefer-const let mockValidModels: string[] = []; @@ -49,16 +52,15 @@ jest.mock('langchain/embeddings/openai', () => { }; }); -class MockMemoryVectorStore { - asRetriever() { - mockAsRetriever(); - } -} jest.mock('langchain/vectorstores/memory', () => { return { MemoryVectorStore: { fromDocuments: jest.fn(() => - Promise.resolve(new MockMemoryVectorStore()) + Promise.resolve({ + asRetriever() { + mockAsRetriever(); + }, + }) ), }, }; @@ -115,28 +117,30 @@ jest.mock('@src/openai', () => { }; }); -beforeEach(() => { - // reset environment variables - process.env = { - OPENAI_API_KEY: 'sk-12345', - }; +describe('langchain integration tests ', () => { + beforeAll(() => { + mockFromLLM.mockImplementation(() => mockRetrievalQAChain); + mockLoader.mockResolvedValue([]); + }); - mockFromLLM.mockImplementation(() => mockRetrievalQAChain); -}); + beforeEach(() => { + // reset environment variables + process.env = { + OPENAI_API_KEY: 'sk-12345', + }; + }); -afterEach(() => { - mockPromptEvalChain.call.mockRestore(); - mockRetrievalQAChain.call.mockRestore(); - mockFromLLM.mockRestore(); - mockFromTemplate.mockRestore(); -}); + afterEach(() => { + mockPromptEvalChain.call.mockReset(); + mockRetrievalQAChain.call.mockReset(); + mockFromLLM.mockClear(); + mockFromTemplate.mockClear(); + mockLoader.mockClear(); + }); -describe('langchain integration tests ', () => { test('GIVEN application WHEN application starts THEN document vectors are loaded for all levels', async () => { const numberOfCalls = 4 + 1; // number of levels + common - mockSplitDocuments.mockResolvedValue([]); - await initDocumentVectors(); expect(mockLoader).toHaveBeenCalledTimes(numberOfCalls); expect(mockSplitDocuments).toHaveBeenCalledTimes(numberOfCalls); diff --git a/backend/tsconfig.json b/backend/tsconfig.json index 893a78e82..9e7f3301a 100644 --- a/backend/tsconfig.json +++ b/backend/tsconfig.json @@ -8,12 +8,12 @@ "strict": true, "noImplicitAny": true, "esModuleInterop": true, + "isolatedModules": true, "forceConsistentCasingInFileNames": true, "skipLibCheck": true, "noUnusedLocals": true, "noUnusedParameters": true, "sourceMap": true, - "paths": { "@src/*": ["./src/*"] }