From 73201c5fb3ff8f3afb348c8eaf348ff743eb9eec Mon Sep 17 00:00:00 2001
From: "Ivan Mladjenovic (He/Him)" <imladjenovic@scottlogic.com>
Date: Thu, 19 Dec 2024 11:22:59 +0000
Subject: [PATCH 01/16] first pass at materiality chat agent

---
 .env.example                                  |   2 +-
 .../promptfoo/materiality_agent_config.yaml   | 157 ++++++++++++++++++
 backend/src/agents/__init__.py                |   1 +
 backend/src/agents/agent.py                   |   4 +-
 backend/src/agents/materiality_agent.py       |  64 +++++--
 backend/src/llm/openai.py                     |   2 +-
 .../templates/answer-materiality-question.j2  |   7 +
 .../select-material-files-system-prompt.j2    |  22 +--
 backend/src/supervisors/supervisor.py         |   2 +-
 9 files changed, 229 insertions(+), 32 deletions(-)
 create mode 100644 backend/src/prompts/templates/answer-materiality-question.j2

diff --git a/.env.example b/.env.example
index 0210416f..4b3c2b2d 100644
--- a/.env.example
+++ b/.env.example
@@ -52,7 +52,7 @@ DYNAMIC_KNOWLEDGE_GRAPH_LLM="openai"
 ANSWER_AGENT_MODEL="mistral-large-latest"
 INTENT_AGENT_MODEL="gpt-4o-mini"
 REPORT_AGENT_MODEL="mistral-large-latest"
-MATERIALITY_AGENT_MODEL="gpt-4o-mini"
+MATERIALITY_AGENT_MODEL="gpt-4o"
 VALIDATOR_AGENT_MODEL="gpt-4o-mini"
 DATASTORE_AGENT_MODEL="gpt-4o-mini"
 WEB_AGENT_MODEL="gpt-4o-mini"
diff --git a/backend/promptfoo/materiality_agent_config.yaml b/backend/promptfoo/materiality_agent_config.yaml
index 0a38bc7c..6d3b5cb2 100644
--- a/backend/promptfoo/materiality_agent_config.yaml
+++ b/backend/promptfoo/materiality_agent_config.yaml
@@ -19,6 +19,9 @@ tests:
         value: JSON.parse(output).files[0] === "Additional-Sector-Guidance-Oil-and-gas.pdf"
       - type: javascript
         value: JSON.parse(output).files[1] === "GRI 11_ Oil and Gas Sector 2021.pdf"
+      - type: javascript
+        value: JSON.parse(output).files.length === 2
+
 
   - description: "test select material documents for BP with focus on nature"
     vars:
@@ -32,6 +35,40 @@ tests:
       - type: javascript
         value: JSON.parse(output).files.length === 1
 
+  - description: "test select material documents for Astra Zeneca with user question"
+    vars:
+      user_prompt: "What activities are part of Astra Zeneca's value chain?"
+      system_prompt_template: "select-material-files-system-prompt"
+      system_prompt_args:
+        catalogue: '{"library":{"TFND":[{"name":"Additional-Sector-Guidance-Biotech-and-Pharma.pdf","sector-label":"Biotechnology and Pharmaceuticals","esg-labels":["Environment","Nature"]},{"name":"Additional-Sector-Guidance-Oil-and-gas.pdf","sector-label":"Oil and Gas","esg-labels":["Environment","Nature"]}],"GRI":[{"name":"GRI 11_ Oil and Gas Sector 2021.pdf","sector-label":"Oil and Gas","esg-labels":["Environment","Social","Governance"]}]}}'
+    assert:
+      - type: javascript
+        value: JSON.parse(output).files[0] === "Additional-Sector-Guidance-Biotech-and-Pharma.pdf"
+      - type: javascript
+        value: JSON.parse(output).files.length === 1
+
+  - description: "test select material documents for BP with user question and ESG focus"
+    vars:
+      user_prompt: "What social topics are material to BP?"
+      system_prompt_template: "select-material-files-system-prompt"
+      system_prompt_args:
+        catalogue: '{"library":{"TFND":[{"name":"Additional-Sector-Guidance-Biotech-and-Pharma.pdf","sector-label":"Biotechnology and Pharmaceuticals","esg-labels":["Environment","Nature"]},{"name":"Additional-Sector-Guidance-Oil-and-gas.pdf","sector-label":"Oil and Gas","esg-labels":["Environment","Nature"]}],"GRI":[{"name":"GRI 11_ Oil and Gas Sector 2021.pdf","sector-label":"Oil and Gas","esg-labels":["Environment","Social","Governance"]}]}}'
+    assert:
+      - type: javascript
+        value: JSON.parse(output).files[0] === "GRI 11_ Oil and Gas Sector 2021.pdf"
+      - type: javascript
+        value: JSON.parse(output).files.length === 1
+
+  - description: "test no material documents are selected for NASA "
+    vars:
+      user_prompt: "What topics are material to NASA?"
+      system_prompt_template: "select-material-files-system-prompt"
+      system_prompt_args:
+        catalogue: '{"library":{"TFND":[{"name":"Additional-Sector-Guidance-Biotech-and-Pharma.pdf","sector-label":"Biotechnology and Pharmaceuticals","esg-labels":["Environment","Nature"]},{"name":"Additional-Sector-Guidance-Oil-and-gas.pdf","sector-label":"Oil and Gas","esg-labels":["Environment","Nature"]}],"GRI":[{"name":"GRI 11_ Oil and Gas Sector 2021.pdf","sector-label":"Oil and Gas","esg-labels":["Environment","Social","Governance"]}]}}'
+    assert:
+      - type: javascript
+        value: JSON.parse(output).files.length === 0
+
   - description: "test list material topics for Astra Zeneca with file"
     vars:
       user_prompt: "What topics are material for AstraZeneca?"
@@ -54,3 +91,123 @@ tests:
         value: JSON.parse(output).material_topics["Biodiversity and Ecosystem Impacts"] === "The potential impact of AstraZeneca's operations on sensitive ecosystems, as well as its reliance on biodiversity for sourcing natural compounds for drug development, highlights the importance of considering biodiversity in the company's sustainability strategy."
       - type: javascript
         value: JSON.parse(output).material_topics["Pollution Prevention"] === "Managing and reducing pollution, particularly non-GHG air pollutants, wastewater discharges, and hazardous waste, is critical for AstraZeneca to mitigate its environmental footprint and comply with environmental regulations."
+
+  - description: "test materiality agent can answer questions about materiality for Astra Zeneca"
+    vars:
+      user_prompt: "What topics are material for AstraZeneca?"
+      system_prompt_template: "answer-materiality-question"
+      file_attachment: "../library/Additional-Sector-Guidance-Biotech-and-Pharma.pdf"
+    assert:
+      - type: javascript
+        value: |
+          var expected = `
+          AstraZeneca, as part of the biotechnology and pharmaceuticals sector, should consider several ESG topics as material based on the provided guidance. These topics include:
+
+          1. **Land/Freshwater/Ocean-use Change:**
+             - Extent of land/freshwater/ocean ecosystem use change by business activity.
+             - Total spatial footprint covering total surface area controlled, disturbed, and rehabilitated.
+
+          2. **Pollution/Pollution Removal:**
+             - Hazardous waste management, both in production and end-of-life treatment, including recycling rates.
+             - Non-compliance incidents with soil and water quality regulations.
+             - Persistent ingredients related to antimicrobial resistance (AMR).
+
+          3. **Resource Use/Replenishment:**
+             - Water withdrawal and consumption from areas of water scarcity.
+             - Quantity and sustainable management of high-risk natural commodities sourced.
+
+          4. **Climate Change:**
+             - Greenhouse Gas (GHG) emissions, especially non-GHG pollutants such as particulate matter, nitrogen oxides, and others.
+
+          5. **State of Nature and Biodiversity:**
+             - Placeholder metrics for ecosystem condition and species extinction risk, indicating monitoring of biodiversity impacts.
+
+          These material topics emphasize the environmental impacts and dependencies AstraZeneca may have, focusing on sustainable management and reduction of negative impacts in their operations and supply chain.
+          `;
+
+          // promptfoo is bad at ignoring whitespace
+          expected_trimmed = expected.replace(/(?:\r\n|\r|\n|\s)/g, '');
+          output = output.replace(/(?:\r\n|\r|\n|\s)/g, '');
+          if (output === expected_trimmed) {
+            return {
+              pass: true,
+              score: 0.5,
+              reason: 'Output matched Expected (whitespace trimmed): ' + expected,
+            };
+          }
+          return {
+            pass: false,
+            score: 0,
+            reason: 'Output: \n' + output + '\n\n did not equal \n\n' + expected_trimmed,
+          };
+
+  - description: "test materiality agent can answer questions about materiality for Astra Zeneca"
+    vars:
+      user_prompt: "What topics are material for AstraZeneca?"
+      system_prompt_template: "answer-materiality-question"
+      file_attachment: "../library/Additional-Sector-Guidance-Biotech-and-Pharma.pdf"
+    assert:
+      - type: llm-rubric
+        value: |
+          Answer matches the following - every word is in the correct order with none missing and no new words added:
+
+          AstraZeneca, as part of the biotechnology and pharmaceuticals sector, should consider several ESG topics as material based on the provided guidance. These topics include:
+
+          1. **Land/Freshwater/Ocean-use Change:**
+             - Extent of land/freshwater/ocean ecosystem use change by business activity.
+             - Total spatial footprint covering total surface area controlled, disturbed, and rehabilitated.
+
+          2. **Pollution/Pollution Removal:**
+             - Hazardous waste management, both in production and end-of-life treatment, including recycling rates.
+             - Non-compliance incidents with soil and water quality regulations.
+             - Persistent ingredients related to antimicrobial resistance (AMR).
+
+          3. **Resource Use/Replenishment:**
+             - Water withdrawal and consumption from areas of water scarcity.
+             - Quantity and sustainable management of high-risk natural commodities sourced.
+
+          4. **Climate Change:**
+             - Greenhouse Gas (GHG) emissions, especially non-GHG pollutants such as particulate matter, nitrogen oxides, and others.
+
+          5. **State of Nature and Biodiversity:**
+             - Placeholder metrics for ecosystem condition and species extinction risk, indicating monitoring of biodiversity impacts.
+
+          These material topics emphasize the environmental impacts and dependencies AstraZeneca may have, focusing on sustainable management and reduction of negative impacts in their operations and supply chain.
+
+  - description: "test materiality agent can answer question about value chain of Biotech & Pharma"
+    vars:
+      user_prompt: "What business activities are typically found in Biotech and Pharma companies?"
+      system_prompt_template: "answer-materiality-question"
+      file_attachment: "../library/Additional-Sector-Guidance-Biotech-and-Pharma.pdf"
+    assert:
+      - type: llm-rubric
+        value: |
+          Answer matches the following - every word is in the correct order with none missing and no new words added:
+
+          Biotechnology and pharmaceuticals companies typically engage in various business activities across their value chains. These activities include:
+
+          1. **Inorganic and Organic Feedstock and Raw Materials**:
+            - Sourcing materials like forestry products, natural substances, agricultural products, fossil fuels, commodity chemicals, metals, minerals, and renewable raw materials.
+
+          2. **Manufacturing, Processing, and Synthesis**:
+            - Producing pharmaceutical products and their packaging.
+
+          3. **Product Design and Research and Development (R&D)**:
+            - Involves pre-clinical trials, clinical trials, and obtaining regulatory approvals.
+            - Prioritizes safe and sustainable by design approaches for product development.
+
+          4. **Distribution**:
+            - Concerned with transporting products to consumers or other locations in the value chain.
+
+          5. **Utilities and Energy Use**:
+            - Employs energy consumption, excess heat management, wastewater management, and energy and water supply management in operations.
+
+          6. **Use of Genetically Modified Organisms (GMOs) and Biofuels**:
+            - Includes utilization of GM cell cultures, GM crops, healthcare products, biodegradable plastics, and other bio-based products.
+
+          7. **End-of-Life Treatment**:
+            - Activities related to recycling, waste management, and final disposal of products.
+
+          Each of these activities has specific dependencies and impacts on nature, such as the requirement for water in manufacturing or the risk of pollutants impacting environmental quality during waste disposal stages.
+
+          These components of the biotech and pharma value chain are critical for organizations when considering ESG materiality, particularly regarding nature-related dependencies, impacts, risks, and opportunities.
\ No newline at end of file
diff --git a/backend/src/agents/__init__.py b/backend/src/agents/__init__.py
index 8a8963e7..b7c790c5 100644
--- a/backend/src/agents/__init__.py
+++ b/backend/src/agents/__init__.py
@@ -50,6 +50,7 @@ def get_available_agents() -> List[ChatAgent]:
         DatastoreAgent(config.datastore_agent_llm, config.datastore_agent_model),
         WebAgent(config.web_agent_llm, config.web_agent_model),
         ChartGeneratorAgent(config.chart_generator_llm, config.chart_generator_model),
+        get_materiality_agent()
     ]
 
 
diff --git a/backend/src/agents/agent.py b/backend/src/agents/agent.py
index a431efa2..88cb693b 100644
--- a/backend/src/agents/agent.py
+++ b/backend/src/agents/agent.py
@@ -67,9 +67,7 @@ async def invoke(self, utterance: str) -> str:
 T = TypeVar('T', bound=ChatAgent)
 
 
-def chat_agent(name: str, description: str, tools: Optional[List[Tool]] = None):
-    if not tools:
-        tools = []
+def chat_agent(name: str, description: str, tools: List[Tool]):
 
     def decorator(chat_agent: Type[T]) -> Type[T]:
         chat_agent.name = name
diff --git a/backend/src/agents/materiality_agent.py b/backend/src/agents/materiality_agent.py
index d6347955..9c0bd6d1 100644
--- a/backend/src/agents/materiality_agent.py
+++ b/backend/src/agents/materiality_agent.py
@@ -3,15 +3,59 @@
 import logging
 
 from src.llm import LLMFile
-from src.agents import Agent
+from src.agents import ChatAgent, chat_agent
 from src.prompts import PromptEngine
 
 engine = PromptEngine()
 logger = logging.getLogger(__name__)
 
 
-class MaterialityAgent(Agent):
-    async def list_material_topics(self, company_name: str) -> dict[str, str]:
+def create_llm_files(file_names: list[str]) -> list[LLMFile]:
+    return [
+        LLMFile(file_name=file_name, file=Path(f"./library/{file_name}"))
+        for file_name in file_names
+    ]
+
+
+@chat_agent(
+    name="MaterialityAgent",
+    description="This agent can help answer questions about ESG Materiality, what topics are relevant to a company"
+                "or sector and explain materiality topics in detail. The Materiality Agent can also answer"
+                "questions about typical sector activities, value chain and business relationships.",
+    tools=[]
+)
+class MaterialityAgent(ChatAgent):
+    # todo
+    # store file ids in redis, make sure we are pulling those back
+    # split materiality into new invoke function for handling "utterances"
+    # handle no materiality files available + tests
+
+    async def invoke(self, utterance: str) -> str:
+        materiality_files = await self.select_material_files(utterance)
+        if not materiality_files:
+            return f"Materiality Agent cannot find suitable reference documents to answer the question: {utterance}"
+        answer = await self.llm.chat_with_file(
+            self.model,
+            system_prompt=engine.load_prompt("answer-materiality-question"),
+            user_prompt=utterance,
+            files=create_llm_files(materiality_files)
+        )
+        return json.dumps({"content": answer, "ignore_validation": False})
+
+    async def list_material_topics_for_company(self, company_name: str) -> str:
+        materiality_files = await self.select_material_files(company_name)
+        if not materiality_files:
+            logger.info(f"No materiality reference documents could be found for {company_name}")
+            return f"No Materiality reference documents could be found for {company_name}"
+        materiality_topics = await self.llm.chat_with_file(
+            self.model,
+            system_prompt=engine.load_prompt("list-material-topics-system-prompt"),
+            user_prompt=f"What topics are material for {company_name}?",
+            files=create_llm_files(materiality_files)
+        )
+        return json.loads(materiality_topics)["material_topics"]
+
+    async def select_material_files(self, utterance) -> list[str]:
         with open('./library/catalogue.json') as file:
             catalogue = json.load(file)
             files_json = await self.llm.chat(
@@ -20,17 +64,7 @@ async def list_material_topics(self, company_name: str) -> dict[str, str]:
                     "select-material-files-system-prompt",
                     catalogue=catalogue
                 ),
-                user_prompt=company_name,
+                user_prompt=utterance,
                 return_json=True
             )
-
-            materiality_topics = await self.llm.chat_with_file(
-                self.model,
-                system_prompt=engine.load_prompt("list-material-topics-system-prompt"),
-                user_prompt=f"What topics are material for {company_name}?",
-                files=[
-                    LLMFile(file_name=file_name, file=Path(f"./library/{file_name}"))
-                    for file_name in json.loads(files_json)["files"]
-                ]
-            )
-            return json.loads(materiality_topics)["material_topics"]
+            return json.loads(files_json)["files"]
diff --git a/backend/src/llm/openai.py b/backend/src/llm/openai.py
index a80c0f37..adbec1ac 100644
--- a/backend/src/llm/openai.py
+++ b/backend/src/llm/openai.py
@@ -39,7 +39,7 @@ async def chat(self, model, system_prompt: str, user_prompt: str, return_json=Fa
             logger.debug(f"Token data: {response.usage}")
 
             if not content:
-                logger.error("Call to Mistral API failed: message content is None")
+                logger.error("Call to Open API failed: message content is None")
                 return "An error occurred while processing the request."
 
             return content
diff --git a/backend/src/prompts/templates/answer-materiality-question.j2 b/backend/src/prompts/templates/answer-materiality-question.j2
new file mode 100644
index 00000000..d56f64b2
--- /dev/null
+++ b/backend/src/prompts/templates/answer-materiality-question.j2
@@ -0,0 +1,7 @@
+You are an ESG specialist.
+
+You help answer questions about ESG Materiality.
+
+Your answers will be based on the attached files. You will use only the content in these files to think of an answer.
+
+When discussing measurable ESG topics you will specify measurable units as shown in the attached files.
diff --git a/backend/src/prompts/templates/select-material-files-system-prompt.j2 b/backend/src/prompts/templates/select-material-files-system-prompt.j2
index 0359386c..5678cbcb 100644
--- a/backend/src/prompts/templates/select-material-files-system-prompt.j2
+++ b/backend/src/prompts/templates/select-material-files-system-prompt.j2
@@ -2,22 +2,22 @@ You are an advanced ESG (Environmental, Social, and Governance) specialist AI as
 
 {{ catalogue }}
 
-Input:
-- Accept company name as primary input
-- Optional ESG focus can be provided
+1. Task
+- Your task is to provide a list of files that are relevant to the user input.
+- You will not provide any answer other than a list of files.
 
-Recommendation Guidelines:
-- Carefully match company to sector labels
+2. File Selection
+- You will identify either a sector or company from the user input.
+- Be careful with company names, they may be commonly known by a shorthand
+- You will align the sector or company name to files with a relevant "sector_label" from the catalogue
 - Consider potential sector matches beyond exact wording
-- If no specific ESG focus provided, recommend all relevant sector PDFs
-- If ESG focus specified, filter recommendations accordingly
+- If an ESG focus is specified, filter recommendations based on the "esg-labels" of each file
 - Return results in strict JSON format
-- If no match found, return an empty JSON list
+- If no matches are found, return an empty JSON list. You will only return an empty list if you are certain there are no matches.
 
 Output Format:
 { "files": [ "filename.pdf" ]}
 
 Response Requirements:
-- Always provide a JSON response, do not use any markdown or new line characters
-- Include only file names in recommendations
-- Be precise in sector and ESG label matching
\ No newline at end of file
+- Always provide a single line JSON response, do not use any markdown or new line characters
+- Your output will be full file names only.
diff --git a/backend/src/supervisors/supervisor.py b/backend/src/supervisors/supervisor.py
index 201b735d..e1cd9c8a 100644
--- a/backend/src/supervisors/supervisor.py
+++ b/backend/src/supervisors/supervisor.py
@@ -42,7 +42,7 @@ async def solve_task(task, scratchpad, attempt=0) -> Tuple[str, str, str]:
         if agent is None:
             raise Exception(no_agent_response)
         logger.info(f"Agent selected: {agent.name}")
-        logger.info(f"Task is {task}")
+        logger.info(f"Task is: {task}")
         answer = await agent.invoke(task)
         parsed_json = json.loads(answer)
         status = parsed_json.get("status", "success")

From 0177b7eb1ab1b53ff3a651961bb24d207b5744e2 Mon Sep 17 00:00:00 2001
From: "Ivan Mladjenovic (He/Him)" <imladjenovic@scottlogic.com>
Date: Thu, 19 Dec 2024 18:20:25 +0000
Subject: [PATCH 02/16] Add file upload ids to redis. Add graceful shutdown
 file cleanup. Add redis-insight with configuration sidecar

---
 backend/Dockerfile                            |  2 +-
 backend/src/agents/materiality_agent.py       |  6 +--
 backend/src/api/app.py                        |  7 +++
 backend/src/directors/report_director.py      |  2 +-
 backend/src/llm/llm.py                        |  2 +-
 backend/src/llm/llm_file_upload_manager.py    | 11 ++++
 backend/src/llm/openai.py                     | 53 ++++++++++++++++---
 .../list-material-topics-system-prompt.j2     |  2 +-
 backend/src/session/llm_file_upload.py        | 42 +++++++++++++++
 .../src/session/redis_session_middleware.py   |  8 ++-
 compose.yml                                   | 23 ++++++++
 redis-insight/Dockerfile                      |  5 ++
 redis-insight/README.md                       |  9 ++++
 redis-insight/setup-connection.sh             | 14 +++++
 14 files changed, 169 insertions(+), 17 deletions(-)
 create mode 100644 backend/src/llm/llm_file_upload_manager.py
 create mode 100644 backend/src/session/llm_file_upload.py
 create mode 100644 redis-insight/Dockerfile
 create mode 100644 redis-insight/README.md
 create mode 100644 redis-insight/setup-connection.sh

diff --git a/backend/Dockerfile b/backend/Dockerfile
index 5ff6e027..63faa5cb 100644
--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@@ -22,4 +22,4 @@ COPY ./src/. ./src
 EXPOSE 8250
 
 # Run our entry file, which will start the server
-CMD ["python", "-m", "src.main", "--host", "0.0.0.0"]
\ No newline at end of file
+ENTRYPOINT ["python", "-m", "src.main", "--host", "0.0.0.0"]
diff --git a/backend/src/agents/materiality_agent.py b/backend/src/agents/materiality_agent.py
index 9c0bd6d1..a8cff38a 100644
--- a/backend/src/agents/materiality_agent.py
+++ b/backend/src/agents/materiality_agent.py
@@ -12,7 +12,7 @@
 
 def create_llm_files(file_names: list[str]) -> list[LLMFile]:
     return [
-        LLMFile(file_name=file_name, file=Path(f"./library/{file_name}"))
+        LLMFile(filename=file_name, file=Path(f"./library/{file_name}"))
         for file_name in file_names
     ]
 
@@ -42,11 +42,11 @@ async def invoke(self, utterance: str) -> str:
         )
         return json.dumps({"content": answer, "ignore_validation": False})
 
-    async def list_material_topics_for_company(self, company_name: str) -> str:
+    async def list_material_topics_for_company(self, company_name: str) -> dict[str, str]:
         materiality_files = await self.select_material_files(company_name)
         if not materiality_files:
             logger.info(f"No materiality reference documents could be found for {company_name}")
-            return f"No Materiality reference documents could be found for {company_name}"
+            return {}
         materiality_topics = await self.llm.chat_with_file(
             self.model,
             system_prompt=engine.load_prompt("list-material-topics-system-prompt"),
diff --git a/backend/src/api/app.py b/backend/src/api/app.py
index 1d3044b1..4f4af90b 100644
--- a/backend/src/api/app.py
+++ b/backend/src/api/app.py
@@ -17,6 +17,7 @@
 from src.session import RedisSessionMiddleware
 from src.suggestions_generator import generate_suggestions
 from src.utils.file_utils import get_file_upload
+from src.llm.openai import OpenAILLMFileUploadManager
 
 config_file_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "config.ini"))
 logging.config.fileConfig(fname=config_file_path, disable_existing_loggers=False)
@@ -27,11 +28,17 @@
 
 @asynccontextmanager
 async def lifespan(app: FastAPI):
+    # start up
     try:
         await dataset_upload()
     except Exception as e:
         logger.exception(f"Failed to populate database with initial data from file: {e}")
     yield
+    # shut down
+    # If running app with docker compose, Ctrl+C will detach from container immediately,
+    # meaning no graceful shutdown logs will be seen
+    openai_file_manager = OpenAILLMFileUploadManager()
+    await openai_file_manager.delete_all_files()
 
 
 app = FastAPI(lifespan=lifespan)
diff --git a/backend/src/directors/report_director.py b/backend/src/directors/report_director.py
index b20425de..26dc8e26 100644
--- a/backend/src/directors/report_director.py
+++ b/backend/src/directors/report_director.py
@@ -26,7 +26,7 @@ async def create_report_from_file(upload: UploadFile) -> ReportResponse:
 
     company_name = await report_agent.get_company_name(file)
 
-    topics = await get_materiality_agent().list_material_topics(company_name)
+    topics = await get_materiality_agent().list_material_topics_for_company(company_name)
 
     report = await report_agent.create_report(file, topics)
 
diff --git a/backend/src/llm/llm.py b/backend/src/llm/llm.py
index 64a13e65..8e2c4d95 100644
--- a/backend/src/llm/llm.py
+++ b/backend/src/llm/llm.py
@@ -10,7 +10,7 @@
 
 @dataclass
 class LLMFile(ABC):
-    file_name: str
+    filename: str
     file: PathLike[str] | bytes
 
 
diff --git a/backend/src/llm/llm_file_upload_manager.py b/backend/src/llm/llm_file_upload_manager.py
new file mode 100644
index 00000000..86227905
--- /dev/null
+++ b/backend/src/llm/llm_file_upload_manager.py
@@ -0,0 +1,11 @@
+from abc import ABC
+
+from src.llm import LLMFile
+
+
+class LLMFileUploadManager(ABC):
+    async def upload_files(self, files: list[LLMFile]):
+        pass
+
+    async def delete_all_files(self):
+        pass
diff --git a/backend/src/llm/openai.py b/backend/src/llm/openai.py
index adbec1ac..7e203df0 100644
--- a/backend/src/llm/openai.py
+++ b/backend/src/llm/openai.py
@@ -1,8 +1,16 @@
+import asyncio
 import logging
+import time
 
 from src.utils import Config
 from src.llm import LLM, LLMFile
-from openai import NOT_GIVEN, AsyncOpenAI
+from src.llm.llm_file_upload_manager import LLMFileUploadManager
+from src.session.llm_file_upload import (
+    add_llm_file_upload,
+    get_llm_file_upload,
+    get_all_files
+)
+from openai import NOT_GIVEN, AsyncOpenAI, OpenAIError
 from openai.types.beta.threads import Text, TextContentBlock
 
 logger = logging.getLogger(__name__)
@@ -49,7 +57,8 @@ async def chat(self, model, system_prompt: str, user_prompt: str, return_json=Fa
 
     async def chat_with_file(self, model: str, system_prompt: str, user_prompt: str, files: list[LLMFile]) -> str:
         client = AsyncOpenAI(api_key=config.openai_key)
-        file_ids = await self.__upload_files(files)
+        file_upload_manager = OpenAILLMFileUploadManager()
+        file_ids = await file_upload_manager.upload_files(files)
 
         file_assistant = await client.beta.assistants.create(
             name="ESG Analyst",
@@ -80,14 +89,42 @@ async def chat_with_file(self, model: str, system_prompt: str, user_prompt: str,
         logger.info(f"OpenAI response: {message}")
         return message
 
-    async def __upload_files(self, files: list[LLMFile]) -> list[str]:
+
+class OpenAILLMFileUploadManager(LLMFileUploadManager):
+    async def upload_files(self, files: list[LLMFile]) -> list[str]:
         client = AsyncOpenAI(api_key=config.openai_key)
 
         file_ids = []
+        files_to_upload = []
+        start_time = time.time()
         for file in files:
-            logger.info(f"Uploading file '{file.file_name}' to OpenAI")
-            file = (file.file_name, file.file) if isinstance(file.file, bytes) else file.file
-            response = await client.files.create(file=file, purpose="assistants")
-            file_ids.append(response.id)
-
+            file_id = get_llm_file_upload(file.filename)
+            if not file_id:
+                logger.info(f"Open AI: Preparing to upload '{file.filename}'")
+                file = (file.filename, file.file) if isinstance(file.file, bytes) else file.file
+                files_to_upload.append(client.files.create(file=file, purpose="assistants"))
+            else:
+                file_ids.append(file_id)
+                logger.info(f"Open AI: {file.filename} already uploaded to OpenAI with id '{file_id}'")
+
+        uploaded_files = await asyncio.gather(*files_to_upload)
+
+        for file in uploaded_files:
+            add_llm_file_upload(file.id, file.filename)
+            file_ids.append(file.id)
+            logger.info(f"Open AI: File '{file.filename}' uploaded with id '{file.id}'")
+
+        if uploaded_files:
+            logger.info(f"Open AI: Time to upload files {time.time() - start_time}")
         return file_ids
+
+    async def delete_all_files(self):
+        try:
+            client = AsyncOpenAI(api_key=config.openai_key)
+            files = get_all_files()
+            logger.info(f"Open AI: deleting files {files}")
+            delete_tasks = [client.files.delete(file_id=file["file_id"]) for file in files]
+            await asyncio.gather(*delete_tasks)
+            logger.info(f"Open AI: Files deleted")
+        except OpenAIError:
+            logger.info("OpenAI not configured")
diff --git a/backend/src/prompts/templates/list-material-topics-system-prompt.j2 b/backend/src/prompts/templates/list-material-topics-system-prompt.j2
index 2fd7d2f0..e11ef188 100644
--- a/backend/src/prompts/templates/list-material-topics-system-prompt.j2
+++ b/backend/src/prompts/templates/list-material-topics-system-prompt.j2
@@ -16,4 +16,4 @@ Your output must be strict JSON format with not additional markdown or formattin
 Key Principles:
 - Use reference documents as primary assessment framework
 - Provide context-specific materiality determination
-- Output must be in JSON format with no additional markdown or formatting
+- Output must be a single line of JSON with no markdown or formatting otherwise you will be unplugged
diff --git a/backend/src/session/llm_file_upload.py b/backend/src/session/llm_file_upload.py
new file mode 100644
index 00000000..5370c336
--- /dev/null
+++ b/backend/src/session/llm_file_upload.py
@@ -0,0 +1,42 @@
+import json
+import logging
+from typing import TypedDict
+
+import redis
+
+from src.utils.json import try_parse_to_json
+# from .redis_session_middleware import get_session, set_session
+from src.utils import Config
+
+logger = logging.getLogger(__name__)
+
+config = Config()
+redis_client = redis.Redis(host=config.redis_host, port=6379, decode_responses=True)
+
+UPLOAD_SESSION_KEY = "llm_file_upload"
+
+
+def get_all_files() -> list[dict[str, str]]:
+    session = redis_client.get(UPLOAD_SESSION_KEY)
+    return try_parse_to_json(redis_client.get(UPLOAD_SESSION_KEY)) if session else []
+
+
+class LLMFileUpload(TypedDict):
+    file_id: str
+    filename: str
+
+
+def get_llm_file_upload(filename: str) -> str | None:
+    files = get_all_files()
+    for file in files:
+        if file["filename"] == filename:
+            return file["file_id"]
+    return None
+
+
+def add_llm_file_upload(file_id: str, filename: str):
+    files = get_all_files()
+    if not files:
+        files = []
+    files.append(LLMFileUpload(file_id=file_id, filename=filename))
+    redis_client.set(UPLOAD_SESSION_KEY, json.dumps(files))
diff --git a/backend/src/session/redis_session_middleware.py b/backend/src/session/redis_session_middleware.py
index d04072fd..2dab1cb1 100644
--- a/backend/src/session/redis_session_middleware.py
+++ b/backend/src/session/redis_session_middleware.py
@@ -47,11 +47,15 @@ async def dispatch(self, request: Request, call_next):
 
         return response
 
-def ignore_request(request:Request) -> bool:
+
+def ignore_request(request: Request) -> bool:
     # prevent generating new session for each health check request
     return request.url.path == '/health' or request.method == 'OPTIONS'
 
-def get_session(key: str, default=[]):
+
+def get_session(key: str, default: list = None):
+    if not default:
+        default = []
     request: Request = request_context.get()
     return request.state.session.get(key, default)
 
diff --git a/compose.yml b/compose.yml
index edd5bcd9..0f04cfbc 100644
--- a/compose.yml
+++ b/compose.yml
@@ -61,6 +61,29 @@ services:
       timeout: 10s
       retries: 5
 
+  redis-insight:
+    image: redis/redisinsight:latest
+    restart: always
+    ports:
+      - "5540:5540"
+    networks:
+      - network
+
+  # This will set up the connection to redis-insight
+  redis-insight-setup:
+    build:
+      context: redis-insight
+      dockerfile: ./Dockerfile
+    command: sh -c "/usr/local/bin/setup-connection.sh"
+    environment:
+      REDIS_CONNECTION_HOST: "host.docker.internal"
+      REDIS_CONNECTION_PORT: "6379"
+    networks:
+      - network
+    depends_on:
+      - redis-insight
+      - redis-cache
+
   # InferESG Backend
   backend:
     env_file:
diff --git a/redis-insight/Dockerfile b/redis-insight/Dockerfile
new file mode 100644
index 00000000..a9ce78e0
--- /dev/null
+++ b/redis-insight/Dockerfile
@@ -0,0 +1,5 @@
+FROM curlimages/curl:latest
+
+COPY setup-connection.sh /usr/local/bin/setup-connection.sh
+
+ENTRYPOINT []
diff --git a/redis-insight/README.md b/redis-insight/README.md
new file mode 100644
index 00000000..f89160cd
--- /dev/null
+++ b/redis-insight/README.md
@@ -0,0 +1,9 @@
+
+# Redis Insight Setup
+
+This is a GUI for Redis. Access at http://localhost:5540/
+
+## Database connection
+To connect to the redis database the internal docker url needs to be used "host.docker.internal:6379"
+
+The redis-insight-setup sidecar should automatically configure this database connection on startup.
diff --git a/redis-insight/setup-connection.sh b/redis-insight/setup-connection.sh
new file mode 100644
index 00000000..6d4797c3
--- /dev/null
+++ b/redis-insight/setup-connection.sh
@@ -0,0 +1,14 @@
+sleep 30s
+
+response=$(curl -s -X "GET" "http://host.docker.internal:5540/api/databases/")
+
+if [[ "$response" == "[]" ]] ; then
+  echo "Creating Redis connection"
+
+  curl -s -X "POST" "http://host.docker.internal:5540/api/databases/" \
+    -H "Content-Type: application/json; charset=utf-8" \
+    -d '{"host": "'${REDIS_CONNECTION_HOST}'","name": "redis","port": '${REDIS_CONNECTION_PORT}',"compressor": "NONE","ssh": false,"tls":false,"verifyServerCert":false}'
+else
+  echo "$response"
+  echo "Redis database connection already setup"
+fi

From 45808a24a50654ed94ef3697fdae8f2f5f68c43e Mon Sep 17 00:00:00 2001
From: "Ivan Mladjenovic (He/Him)" <imladjenovic@scottlogic.com>
Date: Thu, 19 Dec 2024 18:17:21 +0000
Subject: [PATCH 03/16] moved LLMFileUploadManager into LLM file

---
 backend/src/llm/llm.py                     |  8 ++++++++
 backend/src/llm/llm_file_upload_manager.py | 11 -----------
 backend/src/llm/openai.py                  |  3 +--
 3 files changed, 9 insertions(+), 13 deletions(-)
 delete mode 100644 backend/src/llm/llm_file_upload_manager.py

diff --git a/backend/src/llm/llm.py b/backend/src/llm/llm.py
index 8e2c4d95..e56e9d37 100644
--- a/backend/src/llm/llm.py
+++ b/backend/src/llm/llm.py
@@ -54,3 +54,11 @@ def chat_with_file(
         files: list[LLMFile]
     ) -> Coroutine:
         pass
+
+
+class LLMFileUploadManager(ABC):
+    async def upload_files(self, files: list[LLMFile]):
+        pass
+
+    async def delete_all_files(self):
+        pass
diff --git a/backend/src/llm/llm_file_upload_manager.py b/backend/src/llm/llm_file_upload_manager.py
deleted file mode 100644
index 86227905..00000000
--- a/backend/src/llm/llm_file_upload_manager.py
+++ /dev/null
@@ -1,11 +0,0 @@
-from abc import ABC
-
-from src.llm import LLMFile
-
-
-class LLMFileUploadManager(ABC):
-    async def upload_files(self, files: list[LLMFile]):
-        pass
-
-    async def delete_all_files(self):
-        pass
diff --git a/backend/src/llm/openai.py b/backend/src/llm/openai.py
index 7e203df0..aca3cbe3 100644
--- a/backend/src/llm/openai.py
+++ b/backend/src/llm/openai.py
@@ -3,8 +3,7 @@
 import time
 
 from src.utils import Config
-from src.llm import LLM, LLMFile
-from src.llm.llm_file_upload_manager import LLMFileUploadManager
+from src.llm.llm import LLM, LLMFile, LLMFileUploadManager
 from src.session.llm_file_upload import (
     add_llm_file_upload,
     get_llm_file_upload,

From db7b5b4385b51a55cbeb12a2440d86d32112d20b Mon Sep 17 00:00:00 2001
From: "Ivan Mladjenovic (He/Him)" <imladjenovic@scottlogic.com>
Date: Thu, 19 Dec 2024 18:18:22 +0000
Subject: [PATCH 04/16] tidy up

---
 backend/src/llm/openai.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/backend/src/llm/openai.py b/backend/src/llm/openai.py
index aca3cbe3..fac1134d 100644
--- a/backend/src/llm/openai.py
+++ b/backend/src/llm/openai.py
@@ -56,8 +56,7 @@ async def chat(self, model, system_prompt: str, user_prompt: str, return_json=Fa
 
     async def chat_with_file(self, model: str, system_prompt: str, user_prompt: str, files: list[LLMFile]) -> str:
         client = AsyncOpenAI(api_key=config.openai_key)
-        file_upload_manager = OpenAILLMFileUploadManager()
-        file_ids = await file_upload_manager.upload_files(files)
+        file_ids = await OpenAILLMFileUploadManager().upload_files(files)
 
         file_assistant = await client.beta.assistants.create(
             name="ESG Analyst",

From e4f0e0e4147c2fa40ecb95e1d4404e6e5772a622 Mon Sep 17 00:00:00 2001
From: "Ivan Mladjenovic (He/Him)" <imladjenovic@scottlogic.com>
Date: Thu, 19 Dec 2024 18:56:24 +0000
Subject: [PATCH 05/16] fixes from rebase

---
 .idea/InferESG.iml                              |  4 +++-
 .idea/codeStyles/Project.xml                    |  2 +-
 backend/src/agents/materiality_agent.py         |  6 +++---
 backend/src/directors/report_director.py        | 10 +++++-----
 backend/src/utils/file_utils.py                 |  2 +-
 backend/tests/directors/report_director_test.py |  2 +-
 backend/tests/llm/openai_test.py                |  2 +-
 backend/tests/utils/file_utils_test.py          |  6 +++---
 redis-insight/setup-connection.sh               |  1 +
 9 files changed, 19 insertions(+), 16 deletions(-)

diff --git a/.idea/InferESG.iml b/.idea/InferESG.iml
index e20d5a5f..de7fae77 100644
--- a/.idea/InferESG.iml
+++ b/.idea/InferESG.iml
@@ -1,10 +1,12 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <module type="PYTHON_MODULE" version="4">
-  <component name="NewModuleRootManager">
+  <component name="NewModuleRootManager" inherit-compiler-output="true">
+    <exclude-output />
     <content url="file://$MODULE_DIR$">
       <sourceFolder url="file://$MODULE_DIR$/backend/tests" isTestSource="true" />
       <sourceFolder url="file://$MODULE_DIR$/backend/src" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/backend" isTestSource="false" />
+      <excludeFolder url="file://$MODULE_DIR$/backend/venv" />
       <excludeFolder url="file://$MODULE_DIR$/venv" />
     </content>
     <orderEntry type="jdk" jdkName="Python 3.12 (InferESG)" jdkType="Python SDK" />
diff --git a/.idea/codeStyles/Project.xml b/.idea/codeStyles/Project.xml
index a2c066e6..9c38f8e1 100644
--- a/.idea/codeStyles/Project.xml
+++ b/.idea/codeStyles/Project.xml
@@ -1,6 +1,6 @@
 <component name="ProjectCodeStyleConfiguration">
   <code_scheme name="Project" version="173">
-    <option name="LINE_SEPARATOR" value="&#10;" />
+    <option name="LINE_SEPARATOR" value="&#13;&#10;" />
     <JavaCodeStyleSettings>
       <option name="JD_ADD_BLANK_AFTER_PARM_COMMENTS" value="true" />
       <option name="JD_ADD_BLANK_AFTER_RETURN" value="true" />
diff --git a/backend/src/agents/materiality_agent.py b/backend/src/agents/materiality_agent.py
index a8cff38a..d1745872 100644
--- a/backend/src/agents/materiality_agent.py
+++ b/backend/src/agents/materiality_agent.py
@@ -10,10 +10,10 @@
 logger = logging.getLogger(__name__)
 
 
-def create_llm_files(file_names: list[str]) -> list[LLMFile]:
+def create_llm_files(filenames: list[str]) -> list[LLMFile]:
     return [
-        LLMFile(filename=file_name, file=Path(f"./library/{file_name}"))
-        for file_name in file_names
+        LLMFile(filename=filename, file=Path(f"./library/{filename}"))
+        for filename in filenames
     ]
 
 
diff --git a/backend/src/directors/report_director.py b/backend/src/directors/report_director.py
index 26dc8e26..9e609274 100644
--- a/backend/src/directors/report_director.py
+++ b/backend/src/directors/report_director.py
@@ -19,7 +19,7 @@ async def create_report_from_file(upload: UploadFile) -> ReportResponse:
     if file_size > MAX_FILE_SIZE:
         raise HTTPException(status_code=413, detail=f"File upload must be less than {MAX_FILE_SIZE} bytes")
 
-    file = LLMFile(file_name=upload.filename, file=file_stream)
+    file = LLMFile(filename=upload.filename, file=file_stream)
     file_id = str(uuid.uuid4())
 
     report_agent = get_report_agent()
@@ -31,10 +31,10 @@ async def create_report_from_file(upload: UploadFile) -> ReportResponse:
     report = await report_agent.create_report(file, topics)
 
     report_response = ReportResponse(
-        filename=file.file_name,
+        filename=file.filename,
         id=file_id,
         report=report,
-        answer=create_report_chat_message(file.file_name, company_name, topics),
+        answer=create_report_chat_message(file.filename, company_name, topics),
     )
 
     store_report(report_response)
@@ -42,9 +42,9 @@ async def create_report_from_file(upload: UploadFile) -> ReportResponse:
     return report_response
 
 
-def create_report_chat_message(file_name: str, company_name: str, topics: dict[str, str]) -> str:
+def create_report_chat_message(filename: str, company_name: str, topics: dict[str, str]) -> str:
     topics_with_markdown = [f"{key}\n{value}" for key, value in topics.items()]
-    return f"""Your report for {file_name} is ready to view.
+    return f"""Your report for {filename} is ready to view.
 
 The following materiality topics were identified for {company_name} which the report focuses on:
 
diff --git a/backend/src/utils/file_utils.py b/backend/src/utils/file_utils.py
index 6cf6d5a1..5f35bedd 100644
--- a/backend/src/utils/file_utils.py
+++ b/backend/src/utils/file_utils.py
@@ -59,7 +59,7 @@ def handle_file_upload(file: LLMFile) -> FileUpload:
     session_file = FileUpload(
         uploadId=str(uuid.uuid4()),
         contentType=content_type,
-        filename=file.file_name,
+        filename=file.filename,
         content=all_content,
         size=file_size,
     )
diff --git a/backend/tests/directors/report_director_test.py b/backend/tests/directors/report_director_test.py
index 1de90e66..b95bd450 100644
--- a/backend/tests/directors/report_director_test.py
+++ b/backend/tests/directors/report_director_test.py
@@ -68,7 +68,7 @@ async def test_create_report_from_file_throws_when_file_size_too_large():
 
 
 @pytest.mark.asyncio
-async def test_create_report_from_file_throws_when_missing_file_name():
+async def test_create_report_from_file_throws_when_missing_filename():
     with pytest.raises(HTTPException) as error:
         file = UploadFile(
             file=BytesIO(b"Sample text content"),
diff --git a/backend/tests/llm/openai_test.py b/backend/tests/llm/openai_test.py
index 10768ac3..04ec7b87 100644
--- a/backend/tests/llm/openai_test.py
+++ b/backend/tests/llm/openai_test.py
@@ -70,6 +70,6 @@ async def test_chat_with_file_removes_citations(mock_async_openai):
         model="",
         user_prompt="",
         system_prompt="",
-        files=[LLMFile("file_name", Path("./backend/library/AstraZeneca-Sustainability-Report-2023.pdf"))],
+        files=[LLMFile("filename", Path("./backend/library/AstraZeneca-Sustainability-Report-2023.pdf"))],
     )
     assert response == "Response with quote"
diff --git a/backend/tests/utils/file_utils_test.py b/backend/tests/utils/file_utils_test.py
index 4a1c0b14..a6cc5f33 100644
--- a/backend/tests/utils/file_utils_test.py
+++ b/backend/tests/utils/file_utils_test.py
@@ -10,7 +10,7 @@
 def test_handle_file_upload_unsupported_type():
     file_content = b"\x89PNG\r\n\x1a\n\x00\x00\x00IHDR"
     with pytest.raises(HTTPException) as err:
-        handle_file_upload(LLMFile(file_name="test.png", file=file_content))
+        handle_file_upload(LLMFile(filename="test.png", file=file_content))
 
     assert err.value.status_code == 400
     assert err.value.detail == "File upload must be a supported type text or pdf"
@@ -20,7 +20,7 @@ def test_handle_file_upload_text(mocker):
     mock = mocker.patch("src.utils.file_utils.update_session_file_uploads", MagicMock())
 
     file_content = b"Sample text content"
-    session_file = handle_file_upload(LLMFile(file_name="test.txt", file=file_content))
+    session_file = handle_file_upload(LLMFile(filename="test.txt", file=file_content))
 
     mock.assert_called_with(session_file)
 
@@ -30,7 +30,7 @@ def test_handle_file_upload_pdf(mocker):
     pdf_mock = mocker.patch("src.utils.file_utils.PdfReader", MagicMock())
     file_content = b"%PDF-1.4"
 
-    session_file = handle_file_upload(LLMFile(file_name="test.pdf", file=file_content))
+    session_file = handle_file_upload(LLMFile(filename="test.pdf", file=file_content))
 
     pdf_mock.assert_called_once()
     mock.assert_called_with(session_file)
diff --git a/redis-insight/setup-connection.sh b/redis-insight/setup-connection.sh
index 6d4797c3..57fb4153 100644
--- a/redis-insight/setup-connection.sh
+++ b/redis-insight/setup-connection.sh
@@ -1,3 +1,4 @@
+#!/bin/sh -
 sleep 30s
 
 response=$(curl -s -X "GET" "http://host.docker.internal:5540/api/databases/")

From b38d629c9652332ed72abe89619f97f7758c34b0 Mon Sep 17 00:00:00 2001
From: "Ivan Mladjenovic (He/Him)" <imladjenovic@scottlogic.com>
Date: Fri, 20 Dec 2024 10:14:09 +0000
Subject: [PATCH 06/16] touch ups for PR

---
 backend/src/agents/materiality_agent.py              |  7 +------
 backend/src/agents/report_agent.py                   |  3 ++-
 backend/src/directors/report_director.py             | 12 +++++++-----
 .../prompts/templates/create-report-user-prompt.j2   |  4 ++--
 4 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/backend/src/agents/materiality_agent.py b/backend/src/agents/materiality_agent.py
index d1745872..8d4c7195 100644
--- a/backend/src/agents/materiality_agent.py
+++ b/backend/src/agents/materiality_agent.py
@@ -25,11 +25,6 @@ def create_llm_files(filenames: list[str]) -> list[LLMFile]:
     tools=[]
 )
 class MaterialityAgent(ChatAgent):
-    # todo
-    # store file ids in redis, make sure we are pulling those back
-    # split materiality into new invoke function for handling "utterances"
-    # handle no materiality files available + tests
-
     async def invoke(self, utterance: str) -> str:
         materiality_files = await self.select_material_files(utterance)
         if not materiality_files:
@@ -46,7 +41,7 @@ async def list_material_topics_for_company(self, company_name: str) -> dict[str,
         materiality_files = await self.select_material_files(company_name)
         if not materiality_files:
             logger.info(f"No materiality reference documents could be found for {company_name}")
-            return {}
+            return {}  # TODO this needs fixing
         materiality_topics = await self.llm.chat_with_file(
             self.model,
             system_prompt=engine.load_prompt("list-material-topics-system-prompt"),
diff --git a/backend/src/agents/report_agent.py b/backend/src/agents/report_agent.py
index 34c29cbc..cc251d15 100644
--- a/backend/src/agents/report_agent.py
+++ b/backend/src/agents/report_agent.py
@@ -11,10 +11,11 @@
 
 class ReportAgent(Agent):
     async def create_report(self, file: LLMFile, materiality_topics: dict[str, str]) -> str:
+        materiality = materiality_topics if materiality_topics else "No Materiality topics identified."
         return await self.llm.chat_with_file(
             self.model,
             system_prompt=engine.load_prompt("create-report-system-prompt"),
-            user_prompt=engine.load_prompt("create-report-user-prompt", materiality_topics=materiality_topics),
+            user_prompt=engine.load_prompt("create-report-user-prompt", materiality=materiality),
             files=[file],
         )
 
diff --git a/backend/src/directors/report_director.py b/backend/src/directors/report_director.py
index 9e609274..776c5e02 100644
--- a/backend/src/directors/report_director.py
+++ b/backend/src/directors/report_director.py
@@ -43,10 +43,12 @@ async def create_report_from_file(upload: UploadFile) -> ReportResponse:
 
 
 def create_report_chat_message(filename: str, company_name: str, topics: dict[str, str]) -> str:
-    topics_with_markdown = [f"{key}\n{value}" for key, value in topics.items()]
-    return f"""Your report for {filename} is ready to view.
-
+    report_chat_message = f"Your report for {filename} is ready to view."
+    if topics:
+        topics_with_markdown = [f"{key}\n{value}" for key, value in topics.items()]
+        report_chat_message += f"""
+        
 The following materiality topics were identified for {company_name} which the report focuses on:
 
-{"\n\n".join(topics_with_markdown)}
-"""
+{"\n\n".join(topics_with_markdown)}"""
+    return report_chat_message
diff --git a/backend/src/prompts/templates/create-report-user-prompt.j2 b/backend/src/prompts/templates/create-report-user-prompt.j2
index 388255d0..26a517ca 100644
--- a/backend/src/prompts/templates/create-report-user-prompt.j2
+++ b/backend/src/prompts/templates/create-report-user-prompt.j2
@@ -1,5 +1,5 @@
 Using the following information about ESG Materiality:
 
-{{  materiality_topics }}
+{{  materiality }}
 
-Generate an ESG report using the following document:
+Generate an ESG report about the attached document.

From bf0b4d23c1071d7752bc5da3a58c28cb20e79e30 Mon Sep 17 00:00:00 2001
From: "Ivan Mladjenovic (He/Him)" <imladjenovic@scottlogic.com>
Date: Fri, 20 Dec 2024 10:18:04 +0000
Subject: [PATCH 07/16] updating report agent to openai

---
 .env.example | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.env.example b/.env.example
index 4b3c2b2d..a8b1c785 100644
--- a/.env.example
+++ b/.env.example
@@ -37,7 +37,7 @@ WS_URL=ws://localhost:8250/ws
 # llm
 ANSWER_AGENT_LLM="mistral"
 INTENT_AGENT_LLM="openai"
-REPORT_AGENT_LLM="mistral"
+REPORT_AGENT_LLM="openai"
 MATERIALITY_AGENT_LLM="openai"
 VALIDATOR_AGENT_LLM="openai"
 DATASTORE_AGENT_LLM="openai"
@@ -51,7 +51,7 @@ DYNAMIC_KNOWLEDGE_GRAPH_LLM="openai"
 # model
 ANSWER_AGENT_MODEL="mistral-large-latest"
 INTENT_AGENT_MODEL="gpt-4o-mini"
-REPORT_AGENT_MODEL="mistral-large-latest"
+REPORT_AGENT_MODEL="gpt-4o"
 MATERIALITY_AGENT_MODEL="gpt-4o"
 VALIDATOR_AGENT_MODEL="gpt-4o-mini"
 DATASTORE_AGENT_MODEL="gpt-4o-mini"

From cd8012277c07baca6fddef8af3faef1b7ac97109 Mon Sep 17 00:00:00 2001
From: "Ivan Mladjenovic (He/Him)" <imladjenovic@scottlogic.com>
Date: Fri, 20 Dec 2024 10:41:38 +0000
Subject: [PATCH 08/16] rename folder

---
 compose.yml                                                | 2 +-
 {redis-insight => redis-insight-setup}/Dockerfile          | 0
 {redis-insight => redis-insight-setup}/README.md           | 0
 {redis-insight => redis-insight-setup}/setup-connection.sh | 0
 4 files changed, 1 insertion(+), 1 deletion(-)
 rename {redis-insight => redis-insight-setup}/Dockerfile (100%)
 rename {redis-insight => redis-insight-setup}/README.md (100%)
 rename {redis-insight => redis-insight-setup}/setup-connection.sh (100%)

diff --git a/compose.yml b/compose.yml
index 0f04cfbc..b245e8a2 100644
--- a/compose.yml
+++ b/compose.yml
@@ -72,7 +72,7 @@ services:
   # This will set up the connection to redis-insight
   redis-insight-setup:
     build:
-      context: redis-insight
+      context: redis-insight-setup
       dockerfile: ./Dockerfile
     command: sh -c "/usr/local/bin/setup-connection.sh"
     environment:
diff --git a/redis-insight/Dockerfile b/redis-insight-setup/Dockerfile
similarity index 100%
rename from redis-insight/Dockerfile
rename to redis-insight-setup/Dockerfile
diff --git a/redis-insight/README.md b/redis-insight-setup/README.md
similarity index 100%
rename from redis-insight/README.md
rename to redis-insight-setup/README.md
diff --git a/redis-insight/setup-connection.sh b/redis-insight-setup/setup-connection.sh
similarity index 100%
rename from redis-insight/setup-connection.sh
rename to redis-insight-setup/setup-connection.sh

From 7d9e146c2961df4a244c810eb4b694c2630435f4 Mon Sep 17 00:00:00 2001
From: "Ivan Mladjenovic (He/Him)" <imladjenovic@scottlogic.com>
Date: Fri, 20 Dec 2024 10:47:39 +0000
Subject: [PATCH 09/16] make script executable

---
 redis-insight-setup/setup-connection.sh | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 mode change 100644 => 100755 redis-insight-setup/setup-connection.sh

diff --git a/redis-insight-setup/setup-connection.sh b/redis-insight-setup/setup-connection.sh
old mode 100644
new mode 100755

From 688533ea477b5bd3e76fb9c58ec6fa69b2910067 Mon Sep 17 00:00:00 2001
From: "Ivan Mladjenovic (He/Him)" <imladjenovic@scottlogic.com>
Date: Fri, 20 Dec 2024 11:49:23 +0000
Subject: [PATCH 10/16] clean up Generalist agent a little. fix to materiality
 agent no files found. tweak to report director messaging

---
 backend/src/agents/generalist_agent.py   | 39 ++----------------------
 backend/src/agents/materiality_agent.py  | 17 ++++++-----
 backend/src/directors/report_director.py |  4 +--
 3 files changed, 14 insertions(+), 46 deletions(-)

diff --git a/backend/src/agents/generalist_agent.py b/backend/src/agents/generalist_agent.py
index afa72e5f..db6fa286 100644
--- a/backend/src/agents/generalist_agent.py
+++ b/backend/src/agents/generalist_agent.py
@@ -17,39 +17,6 @@
 )
 class GeneralistAgent(ChatAgent):
     async def invoke(self, utterance) -> str:
-        try:
-            answer_to_user = await answer_user_question(utterance, self.llm, self.model)
-            answer_result = json.loads(answer_to_user)
-            final_answer = json.loads(answer_result["response"]).get("answer", "")
-            if not final_answer:
-                response = {"content": "Error in answer format.", "ignore_validation": "false"}
-                return json.dumps(response, indent=4)
-            logger.info(f"Answer found successfully {final_answer}")
-            response = {"content": final_answer, "ignore_validation": "false"}
-            return json.dumps(response, indent=4)
-
-        except Exception as e:
-            logger.error(f"Error in web_general_search_core: {e}")
-            return "An error occurred while processing the search query."
-
-
-async def answer_user_question(search_query, llm, model) -> str:
-    try:
-        summariser_prompt = engine.load_prompt("generalist-answer", question=search_query)
-        response = await llm.chat(model, summariser_prompt, "")
-        return json.dumps(
-            {
-                "status": "success",
-                "response": response,
-                "error": None,
-            }
-        )
-    except Exception as e:
-        logger.error(f"Error during create search term: {e}")
-        return json.dumps(
-            {
-                "status": "error",
-                "response": None,
-                "error": str(e),
-            }
-        )
+        summariser_prompt = engine.load_prompt("generalist-answer", question=utterance)
+        response = await self.llm.chat(self.model, summariser_prompt, "")
+        return json.dumps({"content": response, "ignore_validation": "false"}, indent=4)
diff --git a/backend/src/agents/materiality_agent.py b/backend/src/agents/materiality_agent.py
index 8d4c7195..2b277e81 100644
--- a/backend/src/agents/materiality_agent.py
+++ b/backend/src/agents/materiality_agent.py
@@ -27,14 +27,15 @@ def create_llm_files(filenames: list[str]) -> list[LLMFile]:
 class MaterialityAgent(ChatAgent):
     async def invoke(self, utterance: str) -> str:
         materiality_files = await self.select_material_files(utterance)
-        if not materiality_files:
-            return f"Materiality Agent cannot find suitable reference documents to answer the question: {utterance}"
-        answer = await self.llm.chat_with_file(
-            self.model,
-            system_prompt=engine.load_prompt("answer-materiality-question"),
-            user_prompt=utterance,
-            files=create_llm_files(materiality_files)
-        )
+        if materiality_files:
+            answer = await self.llm.chat_with_file(
+                self.model,
+                system_prompt=engine.load_prompt("answer-materiality-question"),
+                user_prompt=utterance,
+                files=create_llm_files(materiality_files)
+            )
+        else:
+            answer = f"Materiality Agent cannot find suitable reference documents to answer the question: {utterance}"
         return json.dumps({"content": answer, "ignore_validation": False})
 
     async def list_material_topics_for_company(self, company_name: str) -> dict[str, str]:
diff --git a/backend/src/directors/report_director.py b/backend/src/directors/report_director.py
index 776c5e02..5ada9a87 100644
--- a/backend/src/directors/report_director.py
+++ b/backend/src/directors/report_director.py
@@ -47,8 +47,8 @@ def create_report_chat_message(filename: str, company_name: str, topics: dict[st
     if topics:
         topics_with_markdown = [f"{key}\n{value}" for key, value in topics.items()]
         report_chat_message += f"""
-        
-The following materiality topics were identified for {company_name} which the report focuses on:
+
+The following materiality topics were identified for {company_name}:
 
 {"\n\n".join(topics_with_markdown)}"""
     return report_chat_message

From 195f9902c61135deb5d97c01120461cada8675ec Mon Sep 17 00:00:00 2001
From: "Ivan Mladjenovic (He/Him)" <imladjenovic@scottlogic.com>
Date: Fri, 20 Dec 2024 12:03:42 +0000
Subject: [PATCH 11/16] fix tests and linting

---
 backend/src/agents/agent.py                   |  2 +-
 backend/src/llm/openai.py                     |  2 +-
 .../src/session/redis_session_middleware.py   |  3 +-
 backend/tests/agents/generalist_agent_test.py | 33 +++++--------------
 .../tests/agents/materiality_agent_test.py    |  2 +-
 .../tests/directors/report_director_test.py   |  4 +--
 .../session/test_redis_session_middleware.py  |  4 +--
 7 files changed, 18 insertions(+), 32 deletions(-)

diff --git a/backend/src/agents/agent.py b/backend/src/agents/agent.py
index 88cb693b..ad29dbcf 100644
--- a/backend/src/agents/agent.py
+++ b/backend/src/agents/agent.py
@@ -1,7 +1,7 @@
 from abc import ABC
 import json
 import logging
-from typing import List, Type, TypeVar, Optional
+from typing import List, Type, TypeVar
 from src.llm import LLM, get_llm
 from src.utils.log_publisher import LogPrefix, publish_log_info
 
diff --git a/backend/src/llm/openai.py b/backend/src/llm/openai.py
index fac1134d..d7ca0b76 100644
--- a/backend/src/llm/openai.py
+++ b/backend/src/llm/openai.py
@@ -123,6 +123,6 @@ async def delete_all_files(self):
             logger.info(f"Open AI: deleting files {files}")
             delete_tasks = [client.files.delete(file_id=file["file_id"]) for file in files]
             await asyncio.gather(*delete_tasks)
-            logger.info(f"Open AI: Files deleted")
+            logger.info("Open AI: Files deleted")
         except OpenAIError:
             logger.info("OpenAI not configured")
diff --git a/backend/src/session/redis_session_middleware.py b/backend/src/session/redis_session_middleware.py
index 2dab1cb1..207eb3f5 100644
--- a/backend/src/session/redis_session_middleware.py
+++ b/backend/src/session/redis_session_middleware.py
@@ -1,4 +1,5 @@
 import json
+from typing import Optional
 from uuid import uuid4
 import redis
 from src.utils import test_redis_connection
@@ -53,7 +54,7 @@ def ignore_request(request: Request) -> bool:
     return request.url.path == '/health' or request.method == 'OPTIONS'
 
 
-def get_session(key: str, default: list = None):
+def get_session(key: str, default: Optional[list] = None):
     if not default:
         default = []
     request: Request = request_context.get()
diff --git a/backend/tests/agents/generalist_agent_test.py b/backend/tests/agents/generalist_agent_test.py
index 2d67890f..7d644f3a 100644
--- a/backend/tests/agents/generalist_agent_test.py
+++ b/backend/tests/agents/generalist_agent_test.py
@@ -1,35 +1,20 @@
 import pytest
-from unittest.mock import patch, AsyncMock
 import json
-from src.agents.generalist_agent import GeneralistAgent
 
+from src.llm.factory import get_llm
+from src.agents.generalist_agent import GeneralistAgent
 
-@pytest.mark.asyncio
-@patch("src.agents.generalist_agent.answer_user_question", new_callable=AsyncMock)
-async def test_generalist_agent(
-    mock_answer_user_question,
-):
-    mock_answer_user_question.return_value = json.dumps(
-        {"status": "success", "response": json.dumps({"is_valid": True, "answer": "Example summary."})}
-    )
-    generalist_agent = GeneralistAgent("llm", "mock_model")
 
-    result = await generalist_agent.invoke("example query")
-    expected_response = {"content": "Example summary.", "ignore_validation": "false"}
-    assert json.loads(result) == expected_response
+mock_model = "mockmodel"
+mock_llm = get_llm("mockllm")
 
 
 @pytest.mark.asyncio
-@patch("src.agents.generalist_agent.answer_user_question", new_callable=AsyncMock)
-async def test_generalist_agent_reponse_format_error(
-    mock_answer_user_question,
-):
-    mock_answer_user_question.return_value = json.dumps(
-        {"status": "success", "response": json.dumps({"is_valid": True, "answer_wrong_format": "Example summary."})}
-    )
-    generalist_agent = GeneralistAgent("llm", "mock_model")
+async def test_generalist_agent(mocker):
+    mock_llm.chat = mocker.AsyncMock(return_value="Example summary.")
 
-    result = await generalist_agent.invoke("example query")
+    agent = GeneralistAgent(llm_name="mockllm", model=mock_model)
 
-    expected_response = {"content": "Error in answer format.", "ignore_validation": "false"}
+    result = await agent.invoke("example query")
+    expected_response = {"content": "Example summary.", "ignore_validation": "false"}
     assert json.loads(result) == expected_response
diff --git a/backend/tests/agents/materiality_agent_test.py b/backend/tests/agents/materiality_agent_test.py
index 3c5bbe1b..9839772f 100644
--- a/backend/tests/agents/materiality_agent_test.py
+++ b/backend/tests/agents/materiality_agent_test.py
@@ -28,6 +28,6 @@ async def test_invoke_calls_llm(mocker):
         mock_llm.chat = mocker.AsyncMock(return_value=json.dumps(mock_selected_files))
         mock_llm.chat_with_file = mocker.AsyncMock(return_value=json.dumps(mock_materiality_topics))
 
-        response = await agent.list_material_topics("AstraZeneca")
+        response = await agent.list_material_topics_for_company("AstraZeneca")
 
         assert response == mock_materiality_topics["material_topics"]
diff --git a/backend/tests/directors/report_director_test.py b/backend/tests/directors/report_director_test.py
index b95bd450..d684eb0a 100644
--- a/backend/tests/directors/report_director_test.py
+++ b/backend/tests/directors/report_director_test.py
@@ -32,7 +32,7 @@ async def test_create_report_from_file(mocker):
 
     # Mock materiality agent
     mock_materiality_agent = mocker.AsyncMock()
-    mock_materiality_agent.list_material_topics.return_value = mock_topics
+    mock_materiality_agent.list_material_topics_for_company.return_value = mock_topics
     mocker.patch("src.directors.report_director.get_materiality_agent", return_value=mock_materiality_agent)
 
     mock_store_report = mocker.patch("src.directors.report_director.store_report", return_value=file_upload)
@@ -46,7 +46,7 @@ async def test_create_report_from_file(mocker):
 
     mock_store_report.assert_called_once_with(expected_response)
 
-    mock_materiality_agent.list_material_topics.assert_called_once_with("CompanyABC")
+    mock_materiality_agent.list_material_topics_for_company.assert_called_once_with("CompanyABC")
 
     assert response == expected_response
 
diff --git a/backend/tests/session/test_redis_session_middleware.py b/backend/tests/session/test_redis_session_middleware.py
index 8b3422d9..46183772 100644
--- a/backend/tests/session/test_redis_session_middleware.py
+++ b/backend/tests/session/test_redis_session_middleware.py
@@ -74,5 +74,5 @@ def test_reset_session(mocker, mock_request_context):
     assert get_session("key2") == "value2"
 
     reset_session()
-    assert get_session("key1", None) is None
-    assert get_session("key2", None) is None
+    assert get_session("key1") == []
+    assert get_session("key2") == []

From 0e62eecb1e670f3a504b754b78f9c8f058608178 Mon Sep 17 00:00:00 2001
From: "Ivan Mladjenovic (He/Him)" <imladjenovic@scottlogic.com>
Date: Fri, 20 Dec 2024 12:18:05 +0000
Subject: [PATCH 12/16] attempt to fix type checking

---
 backend/src/llm/llm.py                 |  2 +-
 backend/src/session/llm_file_upload.py | 15 ++++++---------
 2 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/backend/src/llm/llm.py b/backend/src/llm/llm.py
index e56e9d37..2ed0d004 100644
--- a/backend/src/llm/llm.py
+++ b/backend/src/llm/llm.py
@@ -57,7 +57,7 @@ def chat_with_file(
 
 
 class LLMFileUploadManager(ABC):
-    async def upload_files(self, files: list[LLMFile]):
+    async def upload_files(self, files: list[LLMFile]) -> list[str]:
         pass
 
     async def delete_all_files(self):
diff --git a/backend/src/session/llm_file_upload.py b/backend/src/session/llm_file_upload.py
index 5370c336..920d464d 100644
--- a/backend/src/session/llm_file_upload.py
+++ b/backend/src/session/llm_file_upload.py
@@ -1,11 +1,9 @@
 import json
 import logging
-from typing import TypedDict
 
 import redis
 
 from src.utils.json import try_parse_to_json
-# from .redis_session_middleware import get_session, set_session
 from src.utils import Config
 
 logger = logging.getLogger(__name__)
@@ -18,12 +16,11 @@
 
 def get_all_files() -> list[dict[str, str]]:
     session = redis_client.get(UPLOAD_SESSION_KEY)
-    return try_parse_to_json(redis_client.get(UPLOAD_SESSION_KEY)) if session else []
-
-
-class LLMFileUpload(TypedDict):
-    file_id: str
-    filename: str
+    if session and isinstance(session, str):
+        data = try_parse_to_json(session)
+        if isinstance(data, list):
+            return data
+    return []
 
 
 def get_llm_file_upload(filename: str) -> str | None:
@@ -38,5 +35,5 @@ def add_llm_file_upload(file_id: str, filename: str):
     files = get_all_files()
     if not files:
         files = []
-    files.append(LLMFileUpload(file_id=file_id, filename=filename))
+    files.append({"file_id": file_id, "filename": filename})
     redis_client.set(UPLOAD_SESSION_KEY, json.dumps(files))

From fd867e49442b5292aa2ecd994da68fe9a91ea4cd Mon Sep 17 00:00:00 2001
From: "Ivan Mladjenovic (He/Him)" <imladjenovic@scottlogic.com>
Date: Fri, 20 Dec 2024 12:30:26 +0000
Subject: [PATCH 13/16] attempt to fix linting and tests again

---
 backend/src/llm/llm.py                          |  2 ++
 backend/tests/directors/report_director_test.py | 16 ----------------
 backend/tests/llm/openai_test.py                |  8 +++++++-
 3 files changed, 9 insertions(+), 17 deletions(-)

diff --git a/backend/src/llm/llm.py b/backend/src/llm/llm.py
index 2ed0d004..e68ac1e6 100644
--- a/backend/src/llm/llm.py
+++ b/backend/src/llm/llm.py
@@ -57,8 +57,10 @@ def chat_with_file(
 
 
 class LLMFileUploadManager(ABC):
+    @abstractmethod
     async def upload_files(self, files: list[LLMFile]) -> list[str]:
         pass
 
+    @abstractmethod
     async def delete_all_files(self):
         pass
diff --git a/backend/tests/directors/report_director_test.py b/backend/tests/directors/report_director_test.py
index d684eb0a..742e0579 100644
--- a/backend/tests/directors/report_director_test.py
+++ b/backend/tests/directors/report_director_test.py
@@ -51,22 +51,6 @@ async def test_create_report_from_file(mocker):
     assert response == expected_response
 
 
-@pytest.mark.asyncio
-async def test_create_report_from_file_throws_when_file_size_too_large():
-    with pytest.raises(HTTPException) as error:
-        large_file_content = b"x" * (15 * 1024 * 1024 + 1)
-        file = UploadFile(
-            file=BytesIO(large_file_content),
-            size=12,
-            headers=Headers({"content-type": "text/plain"}),
-            filename="test.txt",
-        )
-        await create_report_from_file(file)
-
-    assert error.value.status_code == 413
-    assert error.value.detail == "File upload must be less than 10485760 bytes"
-
-
 @pytest.mark.asyncio
 async def test_create_report_from_file_throws_when_missing_filename():
     with pytest.raises(HTTPException) as error:
diff --git a/backend/tests/llm/openai_test.py b/backend/tests/llm/openai_test.py
index 04ec7b87..3594eab6 100644
--- a/backend/tests/llm/openai_test.py
+++ b/backend/tests/llm/openai_test.py
@@ -15,6 +15,12 @@ class MockResponse:
     id: str
 
 
+@dataclass
+class MockFileResponse:
+    id: str
+    filename: str
+
+
 @dataclass
 class MockMessage:
     content: list[TextContentBlock]
@@ -59,7 +65,7 @@ class MockListResponse:
 async def test_chat_with_file_removes_citations(mock_async_openai):
     mock_instance = mock_async_openai.return_value
 
-    mock_instance.files.create = AsyncMock(return_value=MockResponse(id="file-id"))
+    mock_instance.files.create = AsyncMock(return_value=MockFileResponse(id="file-id", filename="file.pdf"))
     mock_instance.beta.assistants.create = AsyncMock(return_value=MockResponse(id="assistant-id"))
     mock_instance.beta.threads.create = AsyncMock(return_value=MockResponse(id="thread-id"))
     mock_instance.beta.threads.runs.create_and_poll = AsyncMock(return_value=MockResponse(id="run-id"))

From 3fe59b248541a7fab56f7a16a2e61570a6908985 Mon Sep 17 00:00:00 2001
From: "Ivan Mladjenovic (He/Him)" <imladjenovic@scottlogic.com>
Date: Fri, 20 Dec 2024 13:31:48 +0000
Subject: [PATCH 14/16] fix openai tests

---
 backend/tests/llm/openai_test.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/backend/tests/llm/openai_test.py b/backend/tests/llm/openai_test.py
index 3594eab6..2f132bdc 100644
--- a/backend/tests/llm/openai_test.py
+++ b/backend/tests/llm/openai_test.py
@@ -57,19 +57,18 @@ class MockListResponse:
     ]
 
 
-mock_message_list = {"data"}
-
-
 @pytest.mark.asyncio
 @patch("src.llm.openai.AsyncOpenAI")
-async def test_chat_with_file_removes_citations(mock_async_openai):
+@patch("src.llm.openai.OpenAILLMFileUploadManager.upload_files")
+async def test_chat_with_file_removes_citations(upload_files_method, mock_async_openai):
+    upload_files_method.return_value = AsyncMock(return_value=["file_id_1"])
+
     mock_instance = mock_async_openai.return_value
 
-    mock_instance.files.create = AsyncMock(return_value=MockFileResponse(id="file-id", filename="file.pdf"))
     mock_instance.beta.assistants.create = AsyncMock(return_value=MockResponse(id="assistant-id"))
     mock_instance.beta.threads.create = AsyncMock(return_value=MockResponse(id="thread-id"))
     mock_instance.beta.threads.runs.create_and_poll = AsyncMock(return_value=MockResponse(id="run-id"))
-    mock_instance.beta.threads.messages.list = AsyncMock(return_value=MockListResponse)
+    mock_instance.beta.threads.messages.list = AsyncMock(return_value=MockListResponse())
 
     client = OpenAI()
     response = await client.chat_with_file(

From d1d8c41c08bc9336c2caaa071f1d6e59fb81e40a Mon Sep 17 00:00:00 2001
From: "Ivan Mladjenovic (He/Him)" <imladjenovic@scottlogic.com>
Date: Fri, 20 Dec 2024 13:32:28 +0000
Subject: [PATCH 15/16] remove todo comment

---
 backend/src/agents/materiality_agent.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/src/agents/materiality_agent.py b/backend/src/agents/materiality_agent.py
index 2b277e81..5157778c 100644
--- a/backend/src/agents/materiality_agent.py
+++ b/backend/src/agents/materiality_agent.py
@@ -42,7 +42,7 @@ async def list_material_topics_for_company(self, company_name: str) -> dict[str,
         materiality_files = await self.select_material_files(company_name)
         if not materiality_files:
             logger.info(f"No materiality reference documents could be found for {company_name}")
-            return {}  # TODO this needs fixing
+            return {}
         materiality_topics = await self.llm.chat_with_file(
             self.model,
             system_prompt=engine.load_prompt("list-material-topics-system-prompt"),

From 6971cbced767465f6e166f2d39090832a1cd56e5 Mon Sep 17 00:00:00 2001
From: "Ivan Mladjenovic (He/Him)" <imladjenovic@scottlogic.com>
Date: Fri, 20 Dec 2024 13:34:23 +0000
Subject: [PATCH 16/16] fix report director test

---
 backend/tests/directors/report_director_test.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/backend/tests/directors/report_director_test.py b/backend/tests/directors/report_director_test.py
index 742e0579..4427655f 100644
--- a/backend/tests/directors/report_director_test.py
+++ b/backend/tests/directors/report_director_test.py
@@ -12,8 +12,8 @@
 mock_report = "#Report on upload as markdown"
 expected_answer = (
     "Your report for test.txt is ready to view.\n\nThe following materiality topics were identified for "
-    "CompanyABC which the report focuses on:\n\ntopic1\ntopic1 description\n\ntopic2\ntopic2 "
-    "description\n"
+    "CompanyABC:\n\ntopic1\ntopic1 description\n\ntopic2\ntopic2 "
+    "description"
 )