Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: FS-121 Materiality Agent #50

Merged
merged 25 commits into from
Dec 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
5df7722
first pass
IMladjenovic Dec 5, 2024
733f340
Added checks for file_path and file_stream
dianaPrahoveanu-SL Dec 5, 2024
1701505
rework
IMladjenovic Dec 6, 2024
1c72cc3
rework agents with ChatAgent. improve logic around file upload
IMladjenovic Dec 9, 2024
99e386e
Create materiality agent plus refactors
IMladjenovic Dec 9, 2024
fd273c8
fix materiality agent response. add materiality topics to front end
IMladjenovic Dec 12, 2024
e0e29be
fix pytest
IMladjenovic Dec 10, 2024
fd5ebd8
fix existing tests
IMladjenovic Dec 12, 2024
41dbd71
enable promptfoo tests to handle files. add list_materiality_topics p…
IMladjenovic Dec 11, 2024
e8699a3
create promptfoo tests to select materiality documents. fix non file …
IMladjenovic Dec 11, 2024
a7b8504
add json instructions back to dkg prompts to keep promptfoo tests wor…
IMladjenovic Dec 11, 2024
1699bdc
PR cleanup
IMladjenovic Dec 11, 2024
07c05ff
add mateirality agent tests
IMladjenovic Dec 11, 2024
21e70ad
update openai and tests
IMladjenovic Dec 12, 2024
c16069c
remove caching file ids in Redis for now, this can be implemented and…
IMladjenovic Dec 12, 2024
4b7ac06
remove agent annotations on non ChatAgent Agents
IMladjenovic Dec 12, 2024
71140e7
add promptfoo testing to report agent find company name
IMladjenovic Dec 12, 2024
5557650
PR cleanup
IMladjenovic Dec 12, 2024
8f17464
rework agent decorator and agent tests
IMladjenovic Dec 12, 2024
54425a7
fix tests
IMladjenovic Dec 12, 2024
a15a88b
fix failing tests
IMladjenovic Dec 12, 2024
7e361f5
fix materiality agent tests
IMladjenovic Dec 12, 2024
8794d8d
update materiality agent test
IMladjenovic Dec 12, 2024
1f36740
remove name, description from Agent, make decorator for ChatAgent only
IMladjenovic Dec 13, 2024
ec8de99
rework LLM LLMFile logic
IMladjenovic Dec 13, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ WS_URL=ws://localhost:8250/ws
ANSWER_AGENT_LLM="mistral"
INTENT_AGENT_LLM="openai"
REPORT_AGENT_LLM="mistral"
MATERIALITY_AGENT_LLM="openai"
VALIDATOR_AGENT_LLM="openai"
DATASTORE_AGENT_LLM="openai"
WEB_AGENT_LLM="openai"
Expand All @@ -52,6 +53,7 @@ DYNAMIC_KNOWLEDGE_GRAPH_LLM="openai"
ANSWER_AGENT_MODEL="mistral-large-latest"
INTENT_AGENT_MODEL="gpt-4o-mini"
REPORT_AGENT_MODEL="mistral-large-latest"
MATERIALITY_AGENT_MODEL="gpt-4o-mini"
VALIDATOR_AGENT_MODEL="gpt-4o-mini"
DATASTORE_AGENT_MODEL="gpt-4o-mini"
WEB_AGENT_MODEL="gpt-4o-mini"
Expand Down
5 changes: 4 additions & 1 deletion backend/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,12 @@ WORKDIR /backend
# Copy just the requirements into the working directory so it gets cached by itself
COPY ./requirements.txt ./requirements.txt

# Copy the datasets directory, this should match what local run of application will need
# Copy the datasets directory
COPY ./datasets/ ./datasets/

# Copy the library directory
COPY ./library/ ./library/

# Install the dependencies from the requirements file
RUN pip install --no-cache-dir --upgrade -r /backend/requirements.txt

Expand Down
1 change: 1 addition & 0 deletions backend/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import pytest
import os


@pytest.hookimpl(tryfirst=True)
def pytest_configure(config):
# Set an environment variable to indicate pytest is running
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
23 changes: 23 additions & 0 deletions backend/library/catalogue.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
{
"library": {
"TFND": [
{
"name": "Additional-Sector-Guidance-Biotech-and-Pharma.pdf",
"sector-label": "Biotechnology and Pharmaceuticals",
"esg-labels": ["Environment", "Nature"]
},
{
"name": "Additional-Sector-Guidance-Oil-and-gas.pdf",
"sector-label": "Oil and Gas",
"esg-labels": ["Environment", "Nature"]
}
],
"GRI": [
{
"name": "GRI 11_ Oil and Gas Sector 2021.pdf",
"sector-label": "Oil and Gas",
"esg-labels": ["Environment", "Social", "Governance"]
}
]
}
}

Large diffs are not rendered by default.

26 changes: 0 additions & 26 deletions backend/promptfoo/dynamic_knowledge_graph_cypher_config.yaml

This file was deleted.

56 changes: 56 additions & 0 deletions backend/promptfoo/materiality_agent_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
description: "Test Materiality Agent Prompts"

providers:
- id: openai:gpt-4o
config:
temperature: 0

prompts: file://promptfoo_test_runner.py:create_prompt

tests:
- description: "test select material documents for BP"
vars:
user_prompt: "BP"
system_prompt_template: "select-material-files-system-prompt"
system_prompt_args:
catalogue: '{"library":{"TFND":[{"name":"Additional-Sector-Guidance-Biotech-and-Pharma.pdf","sector-label":"Biotechnology and Pharmaceuticals","esg-labels":["Environment","Nature"]},{"name":"Additional-Sector-Guidance-Oil-and-gas.pdf","sector-label":"Oil and Gas","esg-labels":["Environment","Nature"]}],"GRI":[{"name":"GRI 11_ Oil and Gas Sector 2021.pdf","sector-label":"Oil and Gas","esg-labels":["Environment","Social","Governance"]}]}}'
assert:
- type: javascript
value: JSON.parse(output).files[0] === "Additional-Sector-Guidance-Oil-and-gas.pdf"
- type: javascript
value: JSON.parse(output).files[1] === "GRI 11_ Oil and Gas Sector 2021.pdf"

- description: "test select material documents for BP with focus on nature"
vars:
user_prompt: "BP with focus on Nature materiality topics"
system_prompt_template: "select-material-files-system-prompt"
system_prompt_args:
catalogue: '{"library":{"TFND":[{"name":"Additional-Sector-Guidance-Biotech-and-Pharma.pdf","sector-label":"Biotechnology and Pharmaceuticals","esg-labels":["Environment","Nature"]},{"name":"Additional-Sector-Guidance-Oil-and-gas.pdf","sector-label":"Oil and Gas","esg-labels":["Environment","Nature"]}],"GRI":[{"name":"GRI 11_ Oil and Gas Sector 2021.pdf","sector-label":"Oil and Gas","esg-labels":["Environment","Social","Governance"]}]}}'
assert:
- type: javascript
value: JSON.parse(output).files[0] === "Additional-Sector-Guidance-Oil-and-gas.pdf"
- type: javascript
value: JSON.parse(output).files.length === 1

- description: "test list material topics for Astra Zeneca with file"
vars:
user_prompt: "What topics are material for AstraZeneca?"
system_prompt_template: "list-material-topics-system-prompt"
file_attachment: "../library/Additional-Sector-Guidance-Biotech-and-Pharma.pdf"
assert:
- type: is-json
value:
required: ["material_topics"]
type: object
- type: javascript
value: JSON.parse(output).material_topics["Environmental Stewardship"] === "AstraZeneca, like other companies in the biotechnology and pharmaceuticals sector, has significant dependencies and impacts on natural ecosystems. The company relies on biomass provisioning, genetic material for drug development, and water resources for manufacturing. The management of these dependencies and mitigating environmental impacts such as water and soil pollution is crucial for sustainable operations."
- type: javascript
value: JSON.parse(output).material_topics["Climate Change and GHG Emissions"] === "As part of a sector that is intensive in resource and energy use, managing greenhouse gas emissions and transitioning to sustainable energy sources is essential for AstraZeneca to address climate change risks and opportunities, comply with global regulatory standards and meet the expectations of stakeholders."
- type: javascript
value: JSON.parse(output).material_topics["Product Stewardship and Safety"] === "Given the nature of pharmaceuticals, AstraZeneca must ensure the safe production, handling, and disposal of products, preventing environmental contamination, and addressing the issue of pharmaceuticals in the environment, including environmentally persistent pharmaceutical pollutants (EPPPs)."
- type: javascript
value: JSON.parse(output).material_topics["Supply Chain Management"] === "AstraZeneca sources various inorganic and organic feedstock and raw materials that may pose environmental risks if not managed sustainably. Effective supply chain management, including traceability and engagement with suppliers on nature-related impacts, is essential to minimize dependencies and risks."
- type: javascript
value: JSON.parse(output).material_topics["Biodiversity and Ecosystem Impacts"] === "The potential impact of AstraZeneca's operations on sensitive ecosystems, as well as its reliance on biodiversity for sourcing natural compounds for drug development, highlights the importance of considering biodiversity in the company's sustainability strategy."
- type: javascript
value: JSON.parse(output).material_topics["Pollution Prevention"] === "Managing and reducing pollution, particularly non-GHG air pollutants, wastewater discharges, and hazardous waste, is critical for AstraZeneca to mitigate its environmental footprint and comply with environmental regulations."
12 changes: 12 additions & 0 deletions backend/promptfoo/promptfoo_test_runner.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,19 @@
import sys
from pypdf import PdfReader
sys.path.append("../")
from src.prompts.prompting import PromptEngine # noqa: E402

engine = PromptEngine()


def read_pdf_file_for_promptfoo(file_path: str) -> str:
pdf_file = PdfReader(file_path)
content = "\n".join([
page.extract_text() for page in pdf_file.pages
])
return content


def create_prompt(context):
config = context["vars"]

Expand All @@ -20,4 +29,7 @@ def create_prompt(context):
else:
raise Exception("Must provide either user_prompt or user_prompt_template")

if "file_attachment" in config:
user_prompt = f"{user_prompt}\n\nAttached file: {read_pdf_file_for_promptfoo(config["file_attachment"])}"

return [{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}]
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
description: "Test Report Prompt"
description: "Test Report Agent Prompts"

providers:
- id: mistral:mistral-large-latest
Expand All @@ -14,6 +14,116 @@ tests:
system_prompt_template: "create-report-system-prompt"
user_prompt_args:
document_text: "Published September 2024 Carbon Reduction Plan
Supplier name: Amazon Web Services EU SARL (UK Branch) (“AWS UK”)
Publication date: September 30, 2024
Commitment to Achieving Net Zero
AWS UK, as part of Amazon.com, Inc. (“Amazon”), is committed to achieving net -zero
emissions by 2040. In 2019, Amazon co -founded The Climate Pledge, a public commitment
to innovate, use our scale for good and go faster to address the urgency of the climate crisis
to reach net -zero carbon across the entire organization by 2040. Since committing to the
Pledge, we’ve changed how we conduct our business and the running of our operations, and
we’ve increased funding and implementation of new technologies and services that
decarbonize and help preserve the natural world, alon gside the ambitious goals outlined in
The Climate Pledge. We’re fully committed to our goals and our work to build a better planet.
Baseline Emissions Footprint
Base Year emissions are a record of the greenhouse gases that have been produced in the
past an d are the reference point against which emissions reduction can be measured.
Baseline Year: 2020
Additional Details relating to the Baseline Emissions calculations:
AWS UK utilized January 1, 2020 to December 31, 2020 as the baseline year for emissions
reporting under this Carbon Reduction Plan. Our plan includes emissions data from relevant
affiliate companies helping to provide AWS UK’s services to our customers. We ’ve included both
location -based and market -based method Scope 2 emissions in the following tables. AWS UK
benefits from contractual arrangements entered into by our affiliate(s) for renewable electricity
and/or renewable attributes that are reflected in t he market -based data set. More information
about our corporate carbon footprint and methodology can be found on our website .
Our baseline year does not include Scope 1 emissions. In 2022 we updated our methodology
and Scope 1 emissions are now included in total emissions for AWS UK

Published September 2024 Baseline year emissions:
EMISSIONS TOTAL (tCO 2e)
Scope 1 0
Scope 2 61,346 – Location -based method
2,813 – Market -based method
Scope 3 (Included
Sources) 3,770
Total Emissions 65,116 – Location -based method
6,583 – Market -based method
Current Emissions Reporting
Reporting Year: 202 3 (January 1, 202 3 to December 31, 202 3)
EMISSIONS TOTAL (tCO 2e)
Scope 1 2,23 3
Scope 2 126,755 – Location -based method
0 – Market -based method
Scope 3 (Included
Sources) 13,188
Total Emissions 142,17 6 – Location -based method
15,42 1 – Market -based method

Published September 2024 Emissions Reduction Targets
In 2019, we set an ambitious goal to match 100% of the electricity we use with renewable
energy by 2030. This goal includes all data centres , logistics facilities, physical stores, and
corporate offices, as well as on -site charg ing points and our financially integrated subsidiaries.
We are proud to have achieved this goal in 2023, seven years early, with 100% of the electricity
consum ed by Amazon matched with renewable energy sources.
Amazon continue s to be transparent and share our progress to reach net -zero carbon in our
annual Sustainability Report , which also includes details on how we measure carbon .
Carbon Reduction Projects
Completed Carbon Reduction Initiatives
Amazon continues to take actions across our operations to drive carbon reduction around the
world, including in the UK. As of January 202 4, Amazon’s renewable energy portfolio includes
243 wind and solar farms and 2 70 rooftop solar projects, totalling 513 projects and 28
gigawatts of renewable energy capacity. This includes several utility -scale renewable energy
projects located within the UK:
•In 2019, Amazon announced our first power purchase agreement in the UK, located in
Kintyre Peninsula, Scotland. The “Amazon Wind Farm Scotland – Beinn an Tuirc 3”
began o perating in 2021, providing 50 megawatts (MW) of new renewable capacity to
the electricity grid with expected generation of 168,000 megawatt hours (MWh) of
clean energy annually. That’s enough to power 46,000 UK homes every year.
•In December 2020, Amazon a nnounced a two -phase renewable energy project located
in South Lanarkshire, Scotland, the Kennoxhead wind farm. Kennoxhead will be the
largest single -site onshore wind project in the UK, enabled through corporate
procurement. Once fully operational, Kenno xhead will produce 129 MW of renewable
capacity and is expected to generate 439,000 MWh of clean energy annually. Phase 1
(60 MW) began operating in 2022, and Phase 2 (69 MW) will begin operations in 2024 .
•In 2022, Amazon announced its first project in Nor thern Ireland, a 16 MW onshore
windfarm in Co Antrim.
•In 2022, Amazon also announced a new 473 MW offshore wind farm, Moray West,
located off the coast of Scotland . Amazon expects completion of Moray West in 2024.
This is Amazon’s largest project in Scotland and the largest corporate renewable
energy deal announced by any company in the UK to date.
•In 2023, Amazon announced a new 47 MW solar farm, Warl ey located in Essex.
This project is expected to be operational in 2024.

Published September 2024 Declaration and Sign Off
This Carbon Reduction Plan has been completed in accordance with PPN 06/21 and
associated guidance and reporting standard for Carbon Reduction Plans.
Emiss ions have been reported and recorded in accordance with the published reporting
standard for Carbon Reduction Plans and the GHG Reporting Protocol corporate standard1
and uses the appropri ate Government emission conversion factors for greenhouse gas
company reporting2.
Scope 1 and Scope 2 emissions have been reported in accordance with S ECR requirements,
and the required subset of Scope 3 emissions have been reported in accordance with the
published reporting standard for Carbon Reduction Plans and the Corporate Value Chain
(Scope 3) Standard3.
This Carbon Reduction Plan has been reviewed and signed off by the board of directors (or
equivalent management body)."
assert:
- type: contains-all
value:
- "# Basic"
- "# ESG"
- "# Environmental"
- "# Social"
- "# Governance"
- "# Conclusion"

- description: "Test getting company name from file"
vars:
user_prompt_template: "find-company-name-from-file-user-prompt"
system_prompt_template: "find-company-name-from-file-system-prompt"
user_prompt_args:
file_content: "Published September 2024 Carbon Reduction Plan
Supplier name: Amazon Web Services EU SARL (UK Branch) (“AWS UK”)
Publication date: September 30, 2024
Commitment to Achieving Net Zero
Expand Down Expand Up @@ -109,11 +219,9 @@ published reporting standard for Carbon Reduction Plans and the Corporate Value
This Carbon Reduction Plan has been reviewed and signed off by the board of directors (or
equivalent management body)."
assert:
- type: contains-all
- type: is-json
value:
- "# Basic"
- "# ESG"
- "# Environmental"
- "# Social"
- "# Governance"
- "# Conclusion"
required: ["company_name"]
type: object
- type: javascript
value: JSON.parse(output).company_name === "Amazon"
23 changes: 15 additions & 8 deletions backend/src/agents/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from typing import List

from src.utils import Config
from src.agents.agent import Agent, agent
from src.agents.agent import Agent, ChatAgent, chat_agent
from src.agents.datastore_agent import DatastoreAgent
from src.agents.web_agent import WebAgent
from src.agents.intent_agent import IntentAgent
Expand All @@ -10,32 +10,37 @@
from src.agents.answer_agent import AnswerAgent
from src.agents.chart_generator_agent import ChartGeneratorAgent
from src.agents.report_agent import ReportAgent
from src.agents.materiality_agent import MaterialityAgent


config = Config()


def get_validator_agent() -> Agent:
def get_validator_agent() -> ChatAgent:
return ValidatorAgent(config.validator_agent_llm, config.validator_agent_model)


def get_intent_agent() -> Agent:
def get_intent_agent() -> ChatAgent:
return IntentAgent(config.intent_agent_llm, config.intent_agent_model)


def get_answer_agent() -> Agent:
def get_answer_agent() -> ChatAgent:
return AnswerAgent(config.answer_agent_llm, config.answer_agent_model)


def get_report_agent() -> Agent:
def get_report_agent() -> ReportAgent:
return ReportAgent(config.report_agent_llm, config.report_agent_model)


def agent_details(agent) -> dict:
def get_materiality_agent() -> MaterialityAgent:
return MaterialityAgent(config.materiality_agent_llm, config.materiality_agent_model)


def agent_details(agent: ChatAgent) -> dict:
return {"name": agent.name, "description": agent.description}


def get_available_agents() -> List[Agent]:
def get_available_agents() -> List[ChatAgent]:
return [
DatastoreAgent(config.datastore_agent_llm, config.datastore_agent_model),
WebAgent(config.web_agent_llm, config.web_agent_model),
Expand All @@ -49,15 +54,17 @@ def get_agent_details():


__all__ = [
"agent",
"Agent",
"ChatAgent",
"chat_agent",
"agent_details",
"get_agent_details",
"get_answer_agent",
"get_intent_agent",
"get_available_agents",
"get_validator_agent",
"get_report_agent",
"get_materiality_agent",
"Parameter",
"tool",
]
Loading
Loading