diff --git a/backend/README.md b/backend/README.md index 644e66d18..a94520d69 100644 --- a/backend/README.md +++ b/backend/README.md @@ -3,6 +3,7 @@ Python service for the InferESG backend. ## Structure + - `/supervisors` storing all supervisor modules (agents that call other agents for a greater goal) - `/agents` containing all agents the director can call. Agents have their own functions stored within the agent module - `/utils` with all shared utility modules @@ -15,6 +16,7 @@ Python service for the InferESG backend. > Note: You will need to configure a LLM to run the service. This README covers instructions on how to run the application: + - Locally - In a Docker Container @@ -30,14 +32,16 @@ Follow the instructions below to run the backend locally. Change directory to `/ 1. Set-up a virtual environment -> It's recommended (though not technically required) to create a virtual environment for the project by running `python -m venv .venv` to create it and `.venv/Scripts/activate` to activate it in your active terminal. +> It's recommended (though not technically required) to create a virtual environment for the project by running `python -m venv venv` to create it and `source venv/Scripts/activate` to activate it in your active terminal. 2. Install dependencies ```bash pip install -r requirements.txt ``` -> (VsCode) You may run into some issues with compiling python packages from requirements.txt. To resolve this ensure you have downloaded and installed the "Desktop development with C++" workload from your Visual Studio installer. + +> (VsCode) You may run into some issues with compiling python packages from requirements.txt. To resolve this ensure you have downloaded and installed the "Desktop development with C++" workload from your Visual Studio installer. Details on this can be found https://matplotlib.org/devdocs/install/dependencies.html#compiled-extensions + 3. Run the app ```bash @@ -104,7 +108,7 @@ To add further rules, these are added to `ruff.toml` by using the letter asssign ## Test -`pytest` is being used for testing the backend. Like with linting, running the [setup](#set-up) steps should download `pytest` for you. +`pytest` is being used for testing the backend. Like with linting, running the [setup](#set-up) steps should download `pytest` for you. We are using a separate `tests` directory to store all the tests. This directory is intended to mirror the `src` directory to make it easier to find the tests for a specific module. @@ -130,6 +134,6 @@ The vscode extension [Pylance](https://marketplace.visualstudio.com/items?itemNa ```json { - "python.analysis.typeCheckingMode": "basic", + "python.analysis.typeCheckingMode": "basic" } -``` \ No newline at end of file +``` diff --git a/backend/src/api/app.py b/backend/src/api/app.py index d7089ff63..8c78cbc09 100644 --- a/backend/src/api/app.py +++ b/backend/src/api/app.py @@ -12,7 +12,7 @@ from src.utils import Config, test_connection from src.director import question from src.websockets.connection_manager import connection_manager, parse_message -from src.utils.annual_cypher_import import annual_transactions_cypher_script +from src.utils.cyper_import_data_from_csv import import_data_from_csv_script config_file_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "config.ini")) logging.config.fileConfig(fname=config_file_path, disable_existing_loggers=False) @@ -36,10 +36,10 @@ async def lifespan(app: FastAPI): blob_client = container_client.get_blob_client(config.azure_initial_data_filename) download_stream = blob_client.download_blob() annual_transactions = download_stream.readall().decode("utf-8") - populate_db(annual_transactions_cypher_script, json.loads(annual_transactions)) + populate_db(import_data_from_csv_script, json.loads(annual_transactions)) except Exception as e: logger.exception(f"Failed to populate database with initial data from Azure: {e}") - populate_db(annual_transactions_cypher_script, {}) + populate_db(import_data_from_csv_script, {}) yield diff --git a/backend/src/utils/annual_cypher_import.py b/backend/src/utils/annual_cypher_import.py deleted file mode 100644 index b6fc06335..000000000 --- a/backend/src/utils/annual_cypher_import.py +++ /dev/null @@ -1,49 +0,0 @@ -annual_transactions_cypher_script = """ -WITH $data AS data -UNWIND data.all_data[0..1] as info - -FOREACH (_ IN CASE WHEN info.account.display_name IS NOT NULL THEN [1] ELSE [] END | - MERGE (a:Account {name:info.account.display_name}) - FOREACH (transactions IN info.transactions | - FOREACH (t IN transactions | - MERGE (transaction:Transaction {id: t.transaction_id}) - ON CREATE SET - transaction.amount = t.amount, - transaction.description = t.description, - transaction.date = datetime(t.timestamp), - transaction.type = t.transaction_type - MERGE (transaction)-[:PAID_BY]->(a) - - FOREACH (_ IN CASE WHEN t.merchant_name IS NOT NULL THEN [1] ELSE [] END | - MERGE (merchant:Merchant {name: t.merchant_name}) - MERGE (transaction)-[:PAID_TO]->(merchant) - ) - - FOREACH (_ IN CASE WHEN size(t.transaction_classification) = 0 THEN [1] ELSE [] END | - MERGE (uncategorized:Classification {name: "Uncategorized"}) - MERGE (transaction)-[:CLASSIFIED_AS]->(uncategorized) - ) - - FOREACH (payment_classification IN t.transaction_classification | - MERGE (classification:Classification {name:payment_classification}) - MERGE (transaction)-[:CLASSIFIED_AS]->(classification) - ) - ) - ) -) -""" - -remove_credits = """ -MATCH (n:Transaction {type: 'CREDIT'}) -DETACH DELETE n -""" - -remove_transactions_without_merchant = """ -MATCH (n:Transaction)-[r:PAID_TO]->(a:Merchant) -WHERE a IS NULL -DETACH DELETE n -""" - -remove_connecting_nodes = """ -MATCH (n) WHERE NOT (n)--() DELETE (n) -""" diff --git a/backend/src/utils/cyper_import_data_from_csv.py b/backend/src/utils/cyper_import_data_from_csv.py new file mode 100644 index 000000000..96ec1be3b --- /dev/null +++ b/backend/src/utils/cyper_import_data_from_csv.py @@ -0,0 +1,28 @@ +import_data_from_csv_script = """LOAD CSV WITH HEADERS FROM 'file:///esg_poc.csv' AS row + +MERGE (f:Fund {Name: row.`Fund Name`, Size:row.`Fund Size (Billion USD)`}) + +MERGE (c:Company {Name: row.`Company Name`}) + +MERGE (c)<-[:CONTAINS]-(f) + +MERGE (i:Industry {Name: row.Industry}) + +MERGE (c)-[:BELONGS_IN_INDUSTRY]->(i) + +MERGE (co:Country {Name: row.Country}) + +MERGE (c)-[:REGISTERED_IN]->(co) + +MERGE (esge:ESGScore {Category: 'Environmental', Score: row.`ESG score (Environmental)`, Date: row.`ESG scoring date`}) + +MERGE (c)-[:HAS_ESG_SCORE]->(esge) + +MERGE (esgs:ESGScore {Category: 'Social', Score: row.`ESG score (Social)`, Date: row.`ESG scoring date`}) + +MERGE (c)-[:HAS_ESG_SCORE]->(esgs) + +MERGE (esgg:ESGScore {Category: 'Governance', Score: row.`ESG score (Governance)`, Date: row.`ESG scoring date`}) + +MERGE (c)-[:HAS_ESG_SCORE]->(esgg) +""" diff --git a/backend/src/utils/graph_db_utils.py b/backend/src/utils/graph_db_utils.py index f09003085..529363e69 100644 --- a/backend/src/utils/graph_db_utils.py +++ b/backend/src/utils/graph_db_utils.py @@ -1,7 +1,6 @@ import logging from neo4j import GraphDatabase from src.utils import Config -from src.utils.annual_cypher_import import remove_connecting_nodes, remove_transactions_without_merchant, remove_credits logger = logging.getLogger(__name__) @@ -52,15 +51,6 @@ def populate_db(query, data) -> None: session.run(query, data=data) logger.debug("Database populated") - - session.run(remove_credits) - logger.debug("Removed any credits from database") - - session.run(remove_transactions_without_merchant) - logger.debug("Removed transactions without merchant from database") - - session.run(remove_connecting_nodes) - logger.debug("Removed connecting nodes to transactions without merchants") except Exception as e: logger.exception(f"Error: {e}") raise diff --git a/backend/tests/api/app_test.py b/backend/tests/api/app_test.py index c70f9ff01..1bc9ea0ac 100644 --- a/backend/tests/api/app_test.py +++ b/backend/tests/api/app_test.py @@ -66,8 +66,8 @@ def test_chat_response_failure(mocker): @pytest.mark.asyncio async def test_lifespan_populates_db(mocker, mock_initial_data) -> None: mock_populate_db = mocker.patch("src.api.app.populate_db", return_value=mocker.Mock()) - mock_annual_transactions_cypher_script = mocker.patch( - "src.api.app.annual_transactions_cypher_script", return_value=(mocker.Mock()) + mock_import_data_from_csv_script = mocker.patch( + "src.api.app.import_data_from_csv_script", return_value=(mocker.Mock()) ) mock_config = { "azure_initial_data_filename": "test_file", @@ -77,16 +77,16 @@ async def test_lifespan_populates_db(mocker, mock_initial_data) -> None: mocker.patch("src.api.app.config", return_value=mock_config) with client: - mock_populate_db.assert_called_once_with(mock_annual_transactions_cypher_script, mock_initial_data) + mock_populate_db.assert_called_once_with(mock_import_data_from_csv_script, mock_initial_data) @pytest.mark.asyncio async def test_lifespan_missing_config_populates_db(mocker) -> None: mock_populate_db = mocker.patch("src.api.app.populate_db", return_value=mocker.Mock()) - mock_annual_transactions_cypher_script = mocker.patch( - "src.api.app.annual_transactions_cypher_script", return_value=(mocker.Mock()) + mock_import_data_from_csv_script = mocker.patch( + "src.api.app.import_data_from_csv_script", return_value=(mocker.Mock()) ) mocker.patch("src.api.app.config", None) with client: - mock_populate_db.assert_called_once_with(mock_annual_transactions_cypher_script, {}) + mock_populate_db.assert_called_once_with(mock_import_data_from_csv_script, {}) diff --git a/backend/tests/utils/graph_db_utils_test.py b/backend/tests/utils/graph_db_utils_test.py index bfb40b49b..9721e0c89 100644 --- a/backend/tests/utils/graph_db_utils_test.py +++ b/backend/tests/utils/graph_db_utils_test.py @@ -53,21 +53,11 @@ def test_populate_db_populates_db(mocker, mock_driver, mock_session): query = "CREATE (n:Test {data: $all_data})" data = {"key": "value"} - remove_credits = "REMOVE CREDITS" - remove_transactions_without_merchant = "REMOVE TRANSACTIONS" - remove_connecting_nodes = "REMOVE NODES" - - mocker.patch("src.utils.graph_db_utils.remove_credits", remove_credits) - mocker.patch("src.utils.graph_db_utils.remove_transactions_without_merchant", remove_transactions_without_merchant) - mocker.patch("src.utils.graph_db_utils.remove_connecting_nodes", remove_connecting_nodes) populate_db(query, data) mock_session.run.assert_any_call("MATCH (n) DETACH DELETE n") mock_session.run.assert_any_call(query, data={"all_data": data}) - mock_session.run.assert_any_call(remove_credits) - mock_session.run.assert_any_call(remove_transactions_without_merchant) - mock_session.run.assert_any_call(remove_connecting_nodes) mock_driver.session.return_value.__exit__.assert_called_once() mock_driver.close.assert_called_once() diff --git a/compose.yml b/compose.yml index 6780149f6..762413fbb 100644 --- a/compose.yml +++ b/compose.yml @@ -10,7 +10,10 @@ networks: services: # neo4j service neo4j-db: - image: neo4j:latest + image: inferesg/data + build: + context: data + dockerfile: ./Dockerfile networks: - network environment: diff --git a/data/Dockerfile b/data/Dockerfile index 774591a38..ae6624179 100644 --- a/data/Dockerfile +++ b/data/Dockerfile @@ -1,5 +1,5 @@ # Choose our version of Node -FROM neo4j:5.19.0 +FROM neo4j:latest # Set up a working directory WORKDIR /data @@ -7,6 +7,9 @@ WORKDIR /data # Copy the data content into the working directory COPY . /data +# Copy the datasets into an import folder +COPY /datasets/* /import/ + # Expose port for writing to the database and viewing the graph EXPOSE 7474 EXPOSE 7687 diff --git a/data/datasets/esg_poc.csv b/data/datasets/esg_poc.csv new file mode 100644 index 000000000..7e085db7e --- /dev/null +++ b/data/datasets/esg_poc.csv @@ -0,0 +1,36 @@ +Fund Name,Fund Size (Billion USD),Company Name,Industry,Country,ESG score (Environmental),ESG score (Social),ESG score (Governance),ESG scoring date +WhiteRock ETF,100,Ryanair,Aviation,Ireland,90,72,88,2023-01-01 +WhiteRock ETF,100,EasyJet,Aviation,UK,60,70,88,2023-01-01 +WhiteRock ETF,100,Turner Construction,Construction,USA,50,60,85,2023-01-01 +WhiteRock ETF,100,Bechtel,Construction,USA,55,50,60,2023-01-01 +WhiteRock ETF,100,Lufthansa,Aviation,Germany,85,78,80,2023-01-01 +WhiteRock ETF,100,KLM,Aviation,Netherlands,75,82,77,2023-01-01 +WhiteRock ETF,100,Shell,Energy,Netherlands,72,78,74,2023-01-01 +WhiteRock ETF,100,Chevron,Energy,USA,69,74,71,2023-01-01 +WhiteRock ETF,100,TotalEnergies,Energy,France,71,76,73,2023-01-01 +Dynamic Industries Fund,350,Turner Construction,Construction,USA,50,60,85,2023-01-01 +Dynamic Industries Fund,350,Bechtel,Construction,USA,55,50,60,2023-01-01 +Dynamic Industries Fund,350,Skanska,Construction,Sweden,65,70,68,2023-01-01 +Dynamic Industries Fund,350,Vinci,Construction,France,70,75,72,2023-01-01 +Dynamic Industries Fund,350,Nvidia,Technology,USA,75,88,90,2023-01-01 +Dynamic Industries Fund,350,Apple,Technology,USA,95,90,92,2023-01-01 +Dynamic Industries Fund,350,Microsoft,Technology,USA,80,89,91,2023-01-01 +Dynamic Industries Fund,350,Google,Technology,USA,81,91,93,2023-01-01 +Silverman Global ETF,400,Bechtel,Construction,USA,55,50,60,2023-01-01 +Silverman Global ETF,400,Turner Construction,Construction,USA,50,60,85,2023-01-01 +Silverman Global ETF,400,Shell,Energy,Netherlands,72,78,74,2023-01-01 +Silverman Global ETF,400,Chevron,Energy,USA,69,74,71,2023-01-01 +Silverman Global ETF,400,TotalEnergies,Energy,France,71,76,73,2023-01-01 +Silverman Global ETF,400,Lufthansa,Aviation,Germany,85,78,80,2023-01-01 +Silverman Global ETF,400,KLM,Aviation,Netherlands,75,82,77,2023-01-01 +Silverman Global ETF,400,British Airways,Aviation,UK,80,85,83,2023-01-01 +Global Energy Fund,1500,ExxonMobil,Energy,USA,70,75,72,2023-01-01 +Global Energy Fund,1500,BP,Energy,UK,68,73,70,2023-01-01 +Global Energy Fund,1500,Shell,Energy,Netherlands,72,78,74,2023-01-01 +Global Energy Fund,1500,Chevron,Energy,USA,69,74,71,2023-01-01 +Global Energy Fund,1500,TotalEnergies,Energy,France,71,76,73,2023-01-01 +European Growth Fund,500,Shell,Energy,Netherlands,72,78,74,2023-01-01 +European Growth Fund,500,TotalEnergies,Energy,France,71,76,73,2023-01-01 +European Growth Fund,500,Skanska,Construction,Sweden,65,70,68,2023-01-01 +European Growth Fund,500,Vinci,Construction,France,70,75,72,2023-01-01 +European Growth Fund,500,British Airways,Aviation,UK,80,85,83,2023-01-01 \ No newline at end of file