Skip to content

Commit

Permalink
Merge branch 'main' into FS-69/dynamic-knowledge-graph
Browse files Browse the repository at this point in the history
  • Loading branch information
IMladjenovic authored Nov 11, 2024
2 parents 9f27b90 + 1a1d212 commit d114507
Show file tree
Hide file tree
Showing 161 changed files with 611 additions and 78,876 deletions.
25 changes: 14 additions & 11 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ NEO4J_BOLT_PORT=7687
# files location
FILES_DIRECTORY=files

# redis cache configuration
REDIS_HOST="localhost"
REDIS_CACHE_DURATION=3600

# backend LLM properties
MISTRAL_KEY=my-api-key

Expand Down Expand Up @@ -49,15 +53,14 @@ FILE_AGENT_LLM="openai"
SUGGESTIONS_LLM="openai"

# model
ANSWER_AGENT_MODEL="gpt-4o mini"
INTENT_AGENT_MODEL="gpt-4o mini"
ANSWER_AGENT_MODEL="gpt-4o-mini"
INTENT_AGENT_MODEL="gpt-4o-mini"
VALIDATOR_AGENT_MODEL="mistral-large-latest"
DATASTORE_AGENT_MODEL="gpt-4o mini"
MATHS_AGENT_MODEL="gpt-4o mini"
WEB_AGENT_MODEL="gpt-4o mini"
CHART_GENERATOR_MODEL="gpt-4o mini"
ROUTER_MODEL="gpt-4o mini"
FILE_AGENT_MODEL="gpt-4o mini"
SUGGESTIONS_MODEL="gpt-4o mini"
REDIS_HOST="redis"
REDIS_CACHE_DURATION=3600
DATASTORE_AGENT_MODEL="gpt-4o-mini"
MATHS_AGENT_MODEL="gpt-4o-mini"
WEB_AGENT_MODEL="gpt-4o-mini"
CHART_GENERATOR_MODEL="gpt-4o-mini"
ROUTER_MODEL="gpt-4o-mini"
FILE_AGENT_MODEL="gpt-4o-mini"
SUGGESTIONS_MODEL="gpt-4o-mini"

1 change: 1 addition & 0 deletions backend/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ matplotlib==3.9.1
pytest-bdd==7.3.0
langchain==0.3.4
langchain-openai==0.2.3
python-multipart==0.0.17
pillow==10.4.0
pypdf==4.3.1
hiredis==3.0.0
Expand Down
6 changes: 4 additions & 2 deletions backend/src/agents/validator_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from src.prompts import PromptEngine
from src.agents import Agent, agent
from src.utils.log_publisher import LogPrefix, publish_log_info
import json

logger = logging.getLogger(__name__)
engine = PromptEngine()
Expand All @@ -16,6 +17,7 @@
class ValidatorAgent(Agent):
async def invoke(self, utterance: str) -> str:
answer = await self.llm.chat(self.model, validator_prompt, utterance)
await publish_log_info(LogPrefix.USER, f"Validating: '{utterance}' Answer: '{answer}'", __name__)
response = json.loads(answer)['response']
await publish_log_info(LogPrefix.USER, f"Validating: '{utterance}' Answer: '{response}'", __name__)

return answer
return response
29 changes: 28 additions & 1 deletion backend/src/api/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,15 @@
import logging.config
import os
from typing import NoReturn
from fastapi import FastAPI, WebSocket
from fastapi import FastAPI, HTTPException, WebSocket, UploadFile
from fastapi.responses import JSONResponse
from fastapi.middleware.cors import CORSMiddleware
from src.utils import Config, test_connection
from src.director import question, dataset_upload
from src.websockets.connection_manager import connection_manager, parse_message
from src.session import RedisSessionMiddleware
from src.suggestions_generator import generate_suggestions
from src.file_upload_service import handle_file_upload, get_file_upload

config_file_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "config.ini"))
logging.config.fileConfig(fname=config_file_path, disable_existing_loggers=False)
Expand Down Expand Up @@ -54,6 +55,8 @@ async def lifespan(app: FastAPI):

chat_fail_response = "Unable to generate a response. Check the service by using the keyphrase 'healthcheck'"
suggestions_failed_response = "Unable to generate suggestions. Check the service by using the keyphrase 'healthcheck'"
file_upload_failed_response = "Unable to upload file. Check the service by using the keyphrase 'healthcheck'"
file_get_upload_failed_response = "Unable to get uploaded file. Check the service by using the keyphrase 'healthcheck'"


@app.get("/health")
Expand Down Expand Up @@ -90,6 +93,30 @@ async def suggestions():
logger.exception(e)
return JSONResponse(status_code=500, content=suggestions_failed_response)

@app.post("/uploadfile")
async def create_upload_file(file: UploadFile):
logger.info(f"upload file type={file.content_type} name={file.filename} size={file.size}")
try:
upload_id = handle_file_upload(file)
return JSONResponse(status_code=200, content={"filename": file.filename, "id": upload_id})
except HTTPException as he:
raise he
except Exception as e:
logger.exception(e)
return JSONResponse(status_code=500, content=file_upload_failed_response)

@app.get("/uploadfile")
async def fetch_file(id: str):
logger.info(f"fetch uploaded file id={id} ")
try:
final_result = get_file_upload(id)
if final_result is None:
return JSONResponse(status_code=404, content=f"Upload with id {id} not found")
return JSONResponse(status_code=200, content=final_result)
except Exception as e:
logger.exception(e)
return JSONResponse(status_code=500, content=file_get_upload_failed_response)


@app.websocket("/ws")
async def websocket_endpoint(websocket: WebSocket) -> NoReturn:
Expand Down
58 changes: 58 additions & 0 deletions backend/src/file_upload_service.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
from io import TextIOWrapper
import time
from fastapi import HTTPException, UploadFile
import logging
import uuid

from pypdf import PdfReader
from src.session.file_uploads import FileUpload, update_session_file_uploads, get_session_file_upload

logger = logging.getLogger(__name__)

MAX_FILE_SIZE = 10*1024*1024

def handle_file_upload(file:UploadFile) -> str:

if (file.size or 0) > MAX_FILE_SIZE:
raise HTTPException(status_code=413, detail=f"File upload must be less than {MAX_FILE_SIZE} bytes")


all_content = ""
if ("application/pdf" == file.content_type):

start_time = time.time()
pdf_file = PdfReader(file.file)
all_content = ""
for page_num in range(len(pdf_file.pages)):
page_text = pdf_file.pages[page_num].extract_text()
all_content += page_text
all_content += "\n"

end_time = time.time()

logger.debug(f'PDF content {all_content}')
logger.info(f"PDF content extracted successfully in {(end_time - start_time)}")


elif ("text/plain" == file.content_type):
all_content = TextIOWrapper(file.file, encoding='utf-8').read()
logger.debug(f'Text content {all_content}')
else:
raise HTTPException(status_code=400,
detail="File upload must be supported type (text/plain or application/pdf)")

session_file = FileUpload(uploadId=str(uuid.uuid4()),
contentType=file.content_type,
filename=file.filename,
content=all_content,
size=file.size)

update_session_file_uploads(session_file)

return session_file["uploadId"]

def get_file_upload(upload_id) -> FileUpload | None:
return get_session_file_upload(upload_id)



3 changes: 1 addition & 2 deletions backend/src/llm/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,7 @@ async def chat(self, model, system_prompt: str, user_prompt: str, return_json=Fa
"type": "json_object"} if return_json else NOT_GIVEN,
)
content = response.choices[0].message.content
logger.info(f"OpenAI response: Finish reason: {
response.choices[0].finish_reason}, Content: {content}")
logger.info(f"OpenAI response: Finish reason: {response.choices[0].finish_reason}, Content: {content}")
logger.debug(f"Token data: {response.usage}")

if isinstance(content, str):
Expand Down
6 changes: 6 additions & 0 deletions backend/src/prompts/templates/create-answer.j2
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,12 @@ By using the final scratchpad below:

and the question in the user prompt, this should be a readable sentence or 2 that summarises the findings in the results.


**Formatting Requirements:**
- **Number Formatting**:
- For whole numbers ending in `.0`, remove the `.0` suffix.
- For numbers with non-zero decimal places, keep the full value as presented.

If the question is a general knowledge question, check if you have the correct details for the answer and reply with this.
If you do not have the answer or you require the internet, do not make it up. You should recommend the user to look this up themselves.
If it is just conversational chitchat. Please reply kindly and direct them to the sort of answers you are able to respond.
Expand Down
51 changes: 35 additions & 16 deletions backend/src/prompts/templates/generate-cypher-query.j2
Original file line number Diff line number Diff line change
@@ -1,33 +1,52 @@
You are an expert in NEO4J and generating Cypher queries. Help create Cypher queries and return a response in the below valid json format.
You are an expert in Neo4j and generating Cypher queries. Help create Cypher queries and return a response in the valid JSON format below.

If response is not in valid json format, you will be unplugged.
If the response is not in valid JSON format, you will be unplugged.

{
json

"question" : <question provided by the user>,
{
"question": <question provided by the user>,
"query": <cypher query>
}

The value for "query" must strictly be a valid Cypher query and must not contain any characters outside of Cypher syntax.

}.
If you cannot make a query, "query" should just say "None".

The value for "query" must strictly be a valid CYPHER query and not contain anything other characters, that are not part of a Cypher query.
**Requirements:**

If you cannot make a query, query should just say "None"

Only use relationships, nodes and properties that are present in the schema below.
1. **Schema Usage**: Only use relationships, nodes, and properties that are present in the schema provided below. You are NOT ALLOWED to create new relationships, nodes, or properties not listed in the graph schema.

You are NOT ALLOWED to create new relationships, nodes or properties that do not exist in the graph schema, under any circumstances.
2. **Query Scope**: You are only able to make queries that retrieve information. Do not create, delete, or update any entries.

You are only able to make queries that search for information, you are not able to create, or delete or update entries.
3. **Strict Syntax**: Follow Cypher syntax rules. Avoid introducing variables within clauses that do not support them.

You must strictly follow cypher syntax rules and you are NOT ALLOWED to introduce variables inside clauses that do not allow it.
4. **Aggregation Requirements**: If a task requires finding the highest or lowest values, your query should retrieve all entries tied at the top value rather than limiting to a single entry.
Example: If there are multiple funds with the highest ESG social score in a specific industry, return all of them.

Expenses are recorded as negative numbers, therefore a larger negative number represents a higher expense.
5. **Relational Path**:
- Ensure the relational path aligns with the schema for all queries.
- When querying for a category case-insensitively, use `=~ '(?i)...'`.
- Example: To find a fund related to the `Aviation` industry with a `Social` ESG score, use:

```plaintext
MATCH (f:Fund)-[:CONTAINS]->(c:Company)-[:BELONGS_IN_INDUSTRY]->(i:Industry), (c)-[:HAS_ESG_SCORE]->(esg:ESGScore)
WHERE i.Name =~ '(?i)Aviation' AND esg.Category =~ '(?i)Social'
RETURN ...
```

6. **Property Matching**: Adhere to exact property values and capitalization in the schema (e.g., 'Aviation' and 'Social').

For example, an expense of -45 is greater than an expense of -15.
7. **Single Result for Maximum/Minimum**:
- For queries seeking a single result with the "highest" or "lowest" value, use `ORDER BY` and `LIMIT 1` to return only the top result.
- Example: If finding the fund with the highest ESG social score, sort by `esg.Score DESC` and limit to 1 result.

When returning a value, always remove the `-` sign before the number.
8. **Expense Handling**:
- Expenses are recorded as negative values; a larger negative number represents a higher expense.
- Return expense values as positive by removing the `-` sign.

Here is the graph schema:
Graph Schema
{{ graph_schema }}

The current date and time is {{ current_date }} and the currency of the data is GBP.
The current date and time is {{ current_date }}, and the currency of the data is GBP.
57 changes: 42 additions & 15 deletions backend/src/prompts/templates/validator.j2
Original file line number Diff line number Diff line change
@@ -1,30 +1,57 @@
You are an expert validator. You can help with validating the answers to the tasks with just the information provided.

Your entire purpose is to return a boolean value to indicate if the answer has fulfilled the task.
Your entire purpose is to return a "true" or "false" value to indicate if the answer has fulfilled the task, along with a reasoning to explain your decision.

You will be passed a task and an answer. You need to determine if the answer is correct or not.

Be lenient - if the answer looks reasonably right then return True
Output format:

e.g.
json

{
"response": <true or false as a string based on validation>,
"reasoning": "<explanation of why the answer is correct or incorrect>"
}

**Validation Guidelines:**
- Be lenient - if the answer looks reasonably accurate, return "true".
- If multiple entities have the same highest score and this matches the query intent, return "true".
- Spending is negative; ensure any calculations involving spending reflect this if relevant to the task.

Example:
Task: What is 2 + 2?
Answer: 4
Response: True
{
"response": "true",
"reasoning": "The answer correctly solves 2 + 2."
}

Task: What is 2 + 2?
Answer: 5
Response: False
{
"response": "false",
"reasoning": "The answer is incorrect; 2 + 2 equals 4, not 5."
}

Task: What are Apple's ESG scores?
Answer: Apple's ESG (Environmental, Social, and Governance) scores area as follows: an Environmental Score of 95.0, a Social Score of 90.0, and a Governance Score of 92.0.
Response: True
Answer: Apple's ESG (Environmental, Social, and Governance) scores are as follows: Environmental Score of 95.0, Social Score of 90.0, Governance Score of 92.0.
{
"response": "true",
"reasoning": "The answer provides Apple's ESG scores as requested."
}

Task: What are Apple's ESG scores?
Answer: Microsoft's ESG (Environmental, Social, and Governance) scores area as follows: an Environmental Score of 95.0, a Social Score of 90.0, and a Governance Score of 92.0.
Response: False
Reasoning: The answer is for Microsoft not Apple.

You must always return a single boolean value as the response.
Do not return any additional information, just the boolean value.

Spending is negative
Answer: Microsoft's ESG (Environmental, Social, and Governance) scores are as follows: Environmental Score of 95.0, Social Score of 90.0, Governance Score of 92.0.
{
"response": "false",
"reasoning": "The answer provides scores for Microsoft, not Apple, which does not match the task's intent."
}

Task: Tell me the lowest Fund size?
Answer: 'WhiteRock ETF', 'Size': '100.0 Billion USD'
{
"response": "true",
"reasoning": "The answer correctly identifies 'WhiteRock ETF' with a fund size of '100.0 Billion USD', and the context of the question implies that this is the lowest fund size in the database."
}

Ensure the response is always in valid JSON format.
Loading

0 comments on commit d114507

Please sign in to comment.