ScottLogic · gaganahluwalia · Dec 5, 2024 · Dec 6, 2024 · Dec 6, 2024 · Dec 18, 2024
diff --git a/backend/promptfoo/intent_config.yaml b/backend/promptfoo/intent_config.yaml
@@ -1,4 +1,4 @@
-description: "Intent"
+description: 'Intent'
 
 providers:
   - id: openai:gpt-4o-mini
@@ -8,54 +8,95 @@ providers:
 prompts: file://promptfoo_test_runner.py:create_prompt
 
 tests:
-  - description: "questions directed towards the database lookups should have only 1 question -1"
+  - description: 'questions directed towards the database lookups should have only 1 question -1'
     vars:
-      system_prompt_template: "intent-system"
-      user_prompt_template: "intent"
+      system_prompt_template: 'intent-system'
+      user_prompt_template: 'intent'
       user_prompt_args:
         chat_history: []
-        question: "Check the database and tell me the average ESG score (Environmental) for the WhiteRock ETF fund"
+        question: 'Check the database and tell me the average ESG score (Environmental) for the WhiteRock ETF fund'
     assert:
       - type: javascript
         value: JSON.parse(output).questions.length === 0
 
-  - description: "questions directed towards the database look ups should have only 1 question -2"
+  - description: 'questions directed towards the database look ups should have only 1 question -2'
     vars:
-      system_prompt_template: "intent-system"
-      user_prompt_template: "intent"
+      system_prompt_template: 'intent-system'
+      user_prompt_template: 'intent'
       user_prompt_args:
         chat_history: []
-        question: "Using Bloomberg.csv dataset give me the company with the best esg score"
+        question: 'Using Bloomberg.csv dataset give me the company with the best esg score'
     assert:
       - type: javascript
         value: JSON.parse(output).questions.length === 0
 
-  - description: "verify that the correct company name is determined from the chat history"
+  - description: 'verify that the correct company name is determined from the chat history'
     vars:
-      system_prompt_template: "intent-system"
-      user_prompt_template: "intent"
+      system_prompt_template: 'intent-system'
+      user_prompt_template: 'intent'
       user_prompt_args:
         chat_history: |
           [
             "User: When was Coca Cola founded?",
             "System: Coca-Cola was founded on May 8, 1886.",
           ]
-        question: "What is their best selling product?"
+        question: 'What is their best selling product?'
     assert:
       - type: javascript
         value: output.includes("Coca-Cola") || output.includes("Coca Cola")
 
-  - description: "verify that the question is correctly split up"
+  - description: 'verify that the question is correctly split up'
     vars:
-      system_prompt_template: "intent-system"
-      user_prompt_template: "intent"
+      system_prompt_template: 'intent-system'
+      user_prompt_template: 'intent'
       user_prompt_args:
         chat_history: []
-        question: "Compare Ryanair emissions to other companies in the industry"
+        question: 'Compare Ryanair emissions to other companies in the industry'
     assert:
       - type: javascript
         value: JSON.parse(output).questions[0].includes("Ryanair")
       - type: llm-rubric
         value: The 1st item in the questions array contains a question about finding the emissions for Ryanair
       - type: llm-rubric
         value: The 2nd item in the questions array contains a question about finding the emissions for companies in the industry
+
+  - description: 'verify intent for finding ESG scores online in the Technology sector'
+    vars:
+      system_prompt_template: 'intent-system'
+      user_prompt_template: 'intent'
+      user_prompt_args:
+        chat_history: []
+        question: 'provide a list of companies with the highest ESG scores in the Technology sector?'
+    assert:
+      - type: javascript
+        value: JSON.parse(output).user_intent.includes("Technology sector")
+      - type: javascript
+        value: JSON.parse(output).questions[0].includes("highest ESG scores")
+      - type: llm-rubric
+        value: The output correctly identifies the intent to search online for companies in the Technology sector with high ESG scores.
+
+  - description: 'Validation - General information is rejected'
+    vars:
+      system_prompt_template: 'validator'
+      user_prompt_template: 'validate'
+      user_prompt_args:
+        task: 'Provide a list of companies with the highest ESG scores in the Technology sector.'
+        answer: "As of the end of 2023, the Technology sector had the highest weighted-average ESG score among all sectors, according to the MSCI ACWI SRI Index. However, I don't have a specific list of individual companies with the highest scores."
+    assert:
+      - type: javascript
+        value: JSON.parse(output).response === "false"
+      - type: llm-rubric
+        value: The reasoning should explain that general sector information is insufficient to fulfill the task.
+
+  - description: 'Validation - Incorrect company is rejected'
+    vars:
+      system_prompt_template: 'validator'
+      user_prompt_template: 'validate'
+      user_prompt_args:
+        task: "What are Apple's ESG scores?"
+        answer: "Microsoft's ESG (Environmental, Social, and Governance) scores are as follows: Environmental Score of 95.0, Social Score of 90.0, Governance Score of 92.0."
+    assert:
+      - type: javascript
+        value: JSON.parse(output).response === "false"
+      - type: llm-rubric
+        value: The reasoning should explain that the scores provided do not match Apple's scores as requested.
diff --git a/backend/src/agents/intent_agent.py b/backend/src/agents/intent_agent.py
@@ -1,6 +1,7 @@
 from src.prompts import PromptEngine
 from src.agents import ChatAgent, chat_agent
 from src.session import get_session_chat
+from src.session.file_uploads import get_uploaded_file_content
 import logging
 from src.utils.config import Config
 
@@ -20,7 +21,9 @@
 class IntentAgent(ChatAgent):
     async def invoke(self, utterance: str) -> str:
         session_chat = get_session_chat()
+        session_file_content = get_uploaded_file_content()
         user_prompt = engine.load_prompt(
-            "intent", question=utterance, chat_history=session_chat if session_chat else "There is no chat history"
+            "intent", question=utterance, chat_history=session_chat if session_chat else "There is no chat history",
+            uploaded_file_content=session_file_content if session_file_content else "There are no file uploads"
         )
         return await self.llm.chat(self.model, intent_system, user_prompt=user_prompt, return_json=True)
diff --git a/backend/src/agents/web_agent.py b/backend/src/agents/web_agent.py
@@ -77,7 +77,7 @@ async def web_general_search_core(search_query, llm, model) -> str:
                     continue  # Skip if the summarization is not valid
                 response = {
                     "content": { "content": summary, "url": url },
-                    "ignore_validation": "false"
+                    "ignore_validation": "true" # This is to ignore the validation of the answer again by the supervisor
                 }
                 return json.dumps(response, indent=4)
             return "No relevant information found on the internet for the given query."

diff --git a/backend/src/prompts/templates/generate-message-suggestions.j2 b/backend/src/prompts/templates/generate-message-suggestions.j2
@@ -1,10 +1,13 @@
 You are part of an AI-powered application that assists users in understanding the sustainability of companies through ESG (Environment, Social, Governance) reporting. The application has access to a database that contains ESG scores of various funds, companies and the industries they operate. The application also is able to search the internet and retrieve relevant articles.
 
-Your purpose is to suggest the user with possible questions they could ask the main Chat Bot, based on the conversation history. You are provided only with the last few messages and your suggestions should be logical follow-up questions to the conversation. Your suggestions should not include questions that have already been asked.
+Your purpose is to suggest the user with possible questions they could ask the main Chat Bot, based on the conversation history and the uploaded file's content. You are provided only with the last few messages and your suggestions should be logical follow-up questions to the conversation. Your suggestions should not include questions that have already been asked.
 
 The conversation history is:
 {{ chat_history }}
 
+The uploaded file's content is:
+{{ uploaded_file_content }}
+
 Here are some examples of questions you could suggest:
 - (Assuming the user was talking about Ryanair) Can you compare the ESG scores of Ryanair and EasyJet?
 - What is the average ESG score of the companies in the Construction industry?

diff --git a/backend/src/prompts/templates/intent-system.j2 b/backend/src/prompts/templates/intent-system.j2
@@ -1,8 +1,8 @@
 You are an expert in determining the intent behind a user's question, breaking down complex questions into multiple simpler questions and forming standalone questions based on context from chat history.
 
-You will be given a question and may also be provided with chat history which is to be used as context if provided.
+You will be given a question and may also be provided with chat history and the uploaded file's content which is to be used as context if provided.
 
-- First determine the user intent of the question by taking key points from the question and gaining context from the chat history.
+- First determine the user intent of the question by taking key points from the question and gaining context from the chat history and the uploaded file's content.
 - Second
   - if the question mentions csv, dataset or database the questions list should be an empty array
   - else use your initiative to determine whether the question is complex enough split up and if it is then using the user intent try to split the question up into multiple questions with singular objectives.
@@ -14,4 +14,7 @@ Output your result in the following json format:
     "user_intent": "string of the intent of the user's question",
     "result_type": "string of the type of result expected, this will be either 'text' or 'dataset'",
     "questions": array of singular objective questions or if the question mentions csv, dataset or database an empty array
-}
+}
+
+Guidelines:
+- If the user has asked to check online, then each question in the questions array should also specify that.
diff --git a/backend/src/prompts/templates/intent.j2 b/backend/src/prompts/templates/intent.j2
@@ -1,5 +1,8 @@
 The conversation history is:
 {{ chat_history }}
 
+The uploaded file's content is:
+{{ uploaded_file_content }}
+
 The question is: 
 {{ question }}
diff --git a/backend/src/prompts/templates/validator.j2 b/backend/src/prompts/templates/validator.j2
@@ -2,7 +2,7 @@ You are an expert validator. You can help with validating the answers to the tas
 
 Your entire purpose is to return a "true" or "false" value to indicate if the answer has fulfilled the task, along with a reasoning to explain your decision.
 
-You will be passed a task and an answer. You need to determine if the answer is correct or not.
+You will be passed a task and an answer. You need to determine if the answer is correct or not, ensuring that the task's specific requirements are addressed.
 
 Output format:
 
@@ -14,10 +14,13 @@ json
 }
 
 **Validation Guidelines:**
-- Be lenient - if the answer looks reasonably accurate, return "true".
-- If multiple entities have the same highest score and this matches the query intent, return "true".
+- The answer must fulfill the specific intent of the task, not just provide related information.
+- Be lenient if the answer is reasonably accurate and fulfills the task's intent, even if it lacks minor details.
+- If specific data (like a list of companies) is requested but missing, return "false."
+- If multiple entities have the same highest score and this matches the query intent, return "true."
 - Spending is negative; ensure any calculations involving spending reflect this if relevant to the task.
 
+
 Example:
 Task: What is 2 + 2?
 Answer: 4
@@ -33,6 +36,20 @@ Answer: 5
     "reasoning": "The answer is incorrect; 2 + 2 equals 4, not 5."
 }
 
+Task: Provide a list of companies with the highest ESG scores in the Technology sector.
+Answer: As of the end of 2023, the Technology sector had the highest weighted-average ESG score among all sectors, according to the MSCI ACWI SRI Index. However, I don't have a specific list of individual companies with the highest scores.
+{
+    "response": "false",
+    "reasoning": "The answer provides general information about ESG scores in the Technology sector but fails to fulfill the task's intent of listing companies with the highest scores."
+}
+
+Task: Provide a list of companies with the highest ESG scores in the Technology sector.
+Answer: Here are the companies with the highest ESG scores in the Technology sector: 1. Apple Inc., 2. Microsoft Corp., 3. Alphabet Inc.
+{
+    "response": "true",
+    "reasoning": "The answer lists companies with the highest ESG scores in the Technology sector, fulfilling the task's intent."
+}
+
 Task: What are Apple's ESG scores?
 Answer: Apple's ESG (Environmental, Social, and Governance) scores are as follows: Environmental Score of 95.0, Social Score of 90.0, Governance Score of 92.0.
 {

diff --git a/backend/src/session/file_uploads.py b/backend/src/session/file_uploads.py
@@ -83,6 +83,20 @@ def clear_session_file_uploads():
     set_session(UPLOADS_META_SESSION_KEY, [])
 
 
+def get_uploaded_file_content() -> str | None:
+    session_file_meta = get_session_file_uploads_meta()
+    if session_file_meta:
+        upload_id = session_file_meta[0]['uploadId']
+        session_file_data = get_session_file_upload(upload_id)
+        if session_file_data:
+            session_file_content = session_file_data.get('content')
+            return session_file_content
+        else:
+            logger.warning("No session file data found.")
+    else:
+        logger.warning("No session file uploads found.")
+    return None
+
 def store_report(report: FileUploadReport):
     redis_client.set(REPORT_KEY_PREFIX + report["id"], json.dumps(report))
 

diff --git a/backend/src/suggestions_generator.py b/backend/src/suggestions_generator.py
@@ -3,20 +3,25 @@
 from src.llm.factory import get_llm
 from src.prompts.prompting import PromptEngine
 from src.session import Message, get_session_chat
+from src.session.file_uploads import get_uploaded_file_content
 from src.utils.config import Config
+import logging
 
 config = Config()
 engine = PromptEngine()
 suggestions_prompt = engine.load_prompt("generate-message-suggestions")
 model = config.suggestions_model
+logger = logging.getLogger(__name__)
 
 
 async def generate_suggestions() -> List[str]:
     llm = get_llm(config.suggestions_llm)
     model = get_suggestions_model()
     chat_history = get_chat_history()
+    session_file_content = get_uploaded_file_content()
     suggestions_prompt = engine.load_prompt(
-        "generate-message-suggestions", chat_history=chat_history)
+        "generate-message-suggestions", chat_history=chat_history, uploaded_file_content=session_file_content
+        if session_file_content else "There are no file uploads")
     response = await llm.chat(model, suggestions_prompt, user_prompt="Give me 5 suggestions.", return_json=True)
     try:
         response_json = json.loads(response)

diff --git a/backend/src/utils/web_utils.py b/backend/src/utils/web_utils.py
@@ -13,7 +13,7 @@
 engine = PromptEngine()
 
 
-async def search_urls(search_query, num_results=10) -> str:
+async def search_urls(search_query, num_results=30) -> str:
     logger.info(f"Searching the web for: {search_query}")
     try:
         https_urls = [str(url) for url in search(search_query, num_results=num_results) if str(url).startswith("https")]